In [None]:
import os
import csv
import math

In [None]:

def load_data(file_path):
    """Load data from CSV file with robust error handling"""
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return None

    try:
        data = []
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            headers = next(reader)
            for row in reader:
                if len(row) == len(headers):
                    data.append(row)
                else:
                    print(f"Skipping row with incorrect column count: {row}")

        print(f"Successfully loaded {len(data)} rows with {len(headers)} columns")
        return {"headers": headers, "data": data}
    except UnicodeDecodeError:
        try:
            data = []
            with open(file_path, 'r', encoding='latin1') as f:
                reader = csv.reader(f)
                headers = next(reader)
                for row in reader:
                    if len(row) == len(headers):
                        data.append(row)
                    else:
                        print(f"Skipping row with incorrect column count: {row}")

            print(f"Successfully loaded {len(data)} rows with {len(headers)} columns using latin1 encoding")
            return {"headers": headers, "data": data}
        except Exception as e:
            print(f"Error loading data with latin1 encoding: {e}")
            return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

In [None]:

def preprocess_data(data_dict):
    """Handle missing values and convert numeric columns"""
    headers = data_dict["headers"]
    data = data_dict["data"]

    # Determine numeric columns (attempt to convert to float)
    numeric_columns = []
    for col_idx in range(len(headers)):
        is_numeric = True
        for row in data[:100]:  # Check first 100 rows
            if col_idx < len(row) and row[col_idx] and row[col_idx].strip():
                try:
                    float(row[col_idx])
                except ValueError:
                    is_numeric = False
                    break
        numeric_columns.append(is_numeric)
     # Handle missing values and convert types
    for row_idx in range(len(data)):
        for col_idx in range(len(headers)):
            if col_idx >= len(data[row_idx]) or not data[row_idx][col_idx] or not data[row_idx][col_idx].strip():
                # Missing value
                if numeric_columns[col_idx]:
                    data[row_idx][col_idx] = 0.0  # Default for numeric
                else:
                    data[row_idx][col_idx] = "Unknown"  # Default for text
            elif numeric_columns[col_idx]:
                try:
                    data[row_idx][col_idx] = float(data[row_idx][col_idx])
                except ValueError:
                    data[row_idx][col_idx] = 0.0

    return {"headers": headers, "data": data, "numeric_columns": numeric_columns}

In [None]:

def calculate_similarity(restaurant1, restaurant2, headers, numeric_columns, weights=None):
    """Calculate similarity between two restaurants"""
    similarity = 0.0
    denominator = 0.0

    # Default weights - prioritize numeric columns
    if weights is None:
        weights = {}
        for i, header in enumerate(headers):
            if numeric_columns[i]:
                weights[header] = 2.0  # Higher weight for numeric
            else:
                weights[header] = 1.0

    # Skip certain columns like ID, name, address, etc.
    skip_keywords = ['id', 'name', 'address', 'phone', 'url']

    for i, header in enumerate(headers):
        # Skip columns with certain keywords
        if any(keyword in header.lower() for keyword in skip_keywords):
            continue

        weight = weights.get(header, 1.0)
        denominator += weight

        # Compare based on column type
        if numeric_columns[i]:
            # Numeric comparison - calculate normalized difference
            val1 = float(restaurant1[i]) if restaurant1[i] else 0.0
            val2 = float(restaurant2[i]) if restaurant2[i] else 0.0

            # Avoid division by zero
            max_val = max(abs(val1), abs(val2))
            if max_val > 0:
                similarity += weight * (1.0 - abs(val1 - val2) / max_val)
            else:
                similarity += weight  # Both zero, consider perfect match
        else:
            # Text comparison - exact match
            if restaurant1[i] == restaurant2[i]:
                similarity += weight

    # Normalize
    if denominator > 0:
        similarity /= denominator

    return similarity


In [None]:
def get_recommendations(data_dict, user_preferences, top_n=5):
    """Get restaurant recommendations based on user preferences"""
    headers = data_dict["headers"]
    data = data_dict["data"]
    numeric_columns = data_dict["numeric_columns"]

    # Create a virtual restaurant from user preferences
    virtual_restaurant = [""] * len(headers)
    for pref_name, pref_value in user_preferences.items():
        # Find corresponding header
        for i, header in enumerate(headers):
            if pref_name.lower() in header.lower():
                virtual_restaurant[i] = pref_value
                break

    # Calculate similarity scores for all restaurants
    similarities = []
    for i, restaurant in enumerate(data):
        similarity = calculate_similarity(virtual_restaurant, restaurant, headers, numeric_columns)
        similarities.append((i, similarity))

    # Sort by similarity (descending) and get top N
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_indices = [x[0] for x in similarities[:top_n]]

    # Format recommendations
    recommendations = []
    for idx in top_indices:
        restaurant = {}
        for i, header in enumerate(headers):
            restaurant[header] = data[idx][i]
        recommendations.append(restaurant)

    return recommendations

In [None]:

def display_restaurant(restaurant, headers):
    """Display restaurant information in a user-friendly format"""
    # Find name, cuisine, rating columns if they exist
    name_col = next((h for h in headers if 'name' in h.lower()), None)
    cuisine_col = next((h for h in headers if 'cuisine' in h.lower()), None)
    rating_col = next((h for h in headers if 'rating' in h.lower()), None)
    price_col = next((h for h in headers if 'price' in h.lower()), None)

    print("-" * 50)
    if name_col:
        print(f"Name: {restaurant[name_col]}")

    if cuisine_col:
        print(f"Cuisine: {restaurant[cuisine_col]}")

    if rating_col:
        print(f"Rating: {restaurant[rating_col]}")

    if price_col:
        print(f"Price: {restaurant[price_col]}")

    # Display a few more important fields
    other_info = []
    skip_cols = ['id', 'url', 'phone', 'address']
    for header in headers:
        if (header != name_col and
            header != cuisine_col and
            header != rating_col and
            header != price_col and
            not any(skip in header.lower() for skip in skip_cols)):
            other_info.append(f"{header}: {restaurant[header]}")

    # Print most important other info (limit to 5)
    for info in other_info[:5]:
        print(info)
    print("-" * 50)


In [None]:
def main():
    # Load dataset
    file_path = "/content/Dataset .csv"
    data_dict = load_data(file_path)

    if not data_dict:
        return

    # Print column headers
    print("\nColumns in dataset:")
    for i, header in enumerate(data_dict["headers"]):
        print(f"{i+1}. {header}")

    # Preprocess data
    print("\nPreprocessing data...")
    processed_data = preprocess_data(data_dict)

    # Determine user preferences
    # Try to find cuisine, price, and rating columns
    headers = processed_data["headers"]
    cuisine_col = next((h for h in headers if 'cuisine' in h.lower()), None)
    price_col = next((h for h in headers if 'price' in h.lower()), None)
    rating_col = next((h for h in headers if 'rating' in h.lower()), None)

    # Create user preferences
    user_preferences = {}

    if cuisine_col:
        # Find a sample cuisine value
        cuisines = set()
        for row in processed_data["data"][:100]:  # Check first 100 rows
            col_idx = headers.index(cuisine_col)
            if col_idx < len(row) and row[col_idx]:
                cuisines.add(row[col_idx])

        if cuisines:
            sample_cuisine = list(cuisines)[0]
            user_preferences[cuisine_col] = sample_cuisine
            print(f"Setting cuisine preference: {sample_cuisine}")

    if price_col:
        col_idx = headers.index(price_col)
        if processed_data["numeric_columns"][col_idx]:
            # Use medium price
            prices = [float(row[col_idx]) for row in processed_data["data"] if col_idx < len(row) and row[col_idx]]
            if prices:
                avg_price = sum(prices) / len(prices)
                user_preferences[price_col] = avg_price
                print(f"Setting price preference: {avg_price}")

    if rating_col:
        col_idx = headers.index(rating_col)
        if processed_data["numeric_columns"][col_idx]:
            # Prefer high ratings
            user_preferences[rating_col] = 4.5
            print(f"Setting rating preference: 4.5")

    # If no preferences could be set, use the first non-ID numeric column
    if not user_preferences:
        for i, header in enumerate(headers):
            if processed_data["numeric_columns"][i] and 'id' not in header.lower():
                # Calculate average
                values = [float(row[i]) for row in processed_data["data"] if i < len(row) and row[i]]
                if values:
                    avg_value = sum(values) / len(values)
                    user_preferences[header] = avg_value
                    print(f"Setting fallback preference {header}: {avg_value}")
                    break

    if not user_preferences:
        print("Could not determine any user preferences. Using first non-ID column.")
        for i, header in enumerate(headers):
            if 'id' not in header.lower():
                user_preferences[header] = processed_data["data"][0][i]
                print(f"Setting fallback preference {header}: {processed_data['data'][0][i]}")
                break

    # Get recommendations
    print("\nGenerating recommendations based on user preferences:")
    for pref, value in user_preferences.items():
        print(f"- {pref}: {value}")

    recommendations = get_recommendations(processed_data, user_preferences)

    # Display recommendations
    print("\nTop Restaurant Recommendations:")
    for i, restaurant in enumerate(recommendations):
        print(f"\nRecommendation #{i+1}:")
        display_restaurant(restaurant, headers)

if __name__ == "__main__":
    main()

Successfully loaded 9551 rows with 21 columns

Columns in dataset:
1. ﻿Restaurant ID
2. Restaurant Name
3. Country Code
4. City
5. Address
6. Locality
7. Locality Verbose
8. Longitude
9. Latitude
10. Cuisines
11. Average Cost for two
12. Currency
13. Has Table booking
14. Has Online delivery
15. Is delivering now
16. Switch to order menu
17. Price range
18. Aggregate rating
19. Rating color
20. Rating text
21. Votes

Preprocessing data...
Setting cuisine preference: Italian, Pizza
Setting price preference: 1.804837189823055
Setting rating preference: 4.5

Generating recommendations based on user preferences:
- Cuisines: Italian, Pizza
- Price range: 1.804837189823055
- Aggregate rating: 4.5

Top Restaurant Recommendations:

Recommendation #1:
--------------------------------------------------
Name: Chapter 1 Cafe
Cuisine: Cafe, Italian, Mexican, North Indian, Continental
Rating: 3.9
Price: 1.0
Country Code: 1.0
City: Agra
Locality: Tajganj
Locality Verbose: Tajganj, Agra
Longitude: 0.0


# Restaurant Recommendation System Output Explanation

This output shows the results of running the restaurant recommendation system on a dataset of 9,551 restaurants. Let me explain what's happening:

## Dataset Information
- The dataset contains **9,551 restaurants** with **21 attributes** each
- Key columns include Restaurant ID, Name, Location details, Cuisines, Price range, and Ratings

## User Preferences
The system automatically detected and set these preferences based on the dataset:
- **Cuisine**: Italian, Pizza
- **Price**: 1.80 (on what appears to be a scale where 1 is inexpensive)
- **Rating**: 4.5 (likely on a 5-point scale)

## How the Recommendations Work
The system uses content-based filtering to find restaurants similar to the preferences. It calculates similarity scores between the "ideal restaurant" (based on preferences) and all restaurants in the dataset.

## Recommendation Analysis

1. **Chapter 1 Cafe** (Agra)
   - Similarity factors: Has Italian cuisine (matching preference), low price point (1.0, close to preference), and decent rating (3.9)
   - This restaurant ranks first because it matches on multiple criteria

2. **Deena Chat Bhandar** (Varanasi)
   - Doesn't match the cuisine preference but has the preferred price point (1.0)
   - Rating (3.8) is relatively good

3. **Affamato** (Noida)
   - Perfect cuisine match: Italian, Pizza
   - Matches the preferred price point (1.0)
   - Rating is missing (0.0), which reduced its overall score

4. **The BBQ Garden** (Faridabad)
   - Different cuisine (North Indian, Seafood)
   - Price (2.0) is close to the preference
   - Missing rating data

5. **Aravali Owls** (Faridabad)
   - Different cuisine (North Indian, Chinese)
   - Price (2.0) is close to the preference
   - Missing rating data

## Interesting Observations
- The system balances multiple factors rather than just matching on cuisine
- Restaurants with missing ratings (shown as 0.0) are still included if they match on other criteria
- Location information is provided but doesn't appear to be a primary matching factor
- The first recommendation includes Italian cuisine but also offers other options (Mexican, North Indian), showing flexibility in the recommendations

This recommendation system effectively finds restaurants that match the user preferences across multiple attributes, prioritizing overall similarity rather than exact matches on any single criterion.

# Restaurant Recommendation System Output Explanation

This output shows the results of running the restaurant recommendation system on a dataset of 9,551 restaurants. Let me explain what's happening:

## Dataset Information
- The dataset contains **9,551 restaurants** with **21 attributes** each
- Key columns include Restaurant ID, Name, Location details, Cuisines, Price range, and Ratings

## User Preferences
The system automatically detected and set these preferences based on the dataset:
- **Cuisine**: Italian, Pizza
- **Price**: 1.80 (on what appears to be a scale where 1 is inexpensive)
- **Rating**: 4.5 (likely on a 5-point scale)

## How the Recommendations Work
The system uses content-based filtering to find restaurants similar to the preferences. It calculates similarity scores between the "ideal restaurant" (based on preferences) and all restaurants in the dataset.

## Recommendation Analysis

1. **Chapter 1 Cafe** (Agra)
   - Similarity factors: Has Italian cuisine (matching preference), low price point (1.0, close to preference), and decent rating (3.9)
   - This restaurant ranks first because it matches on multiple criteria

2. **Deena Chat Bhandar** (Varanasi)
   - Doesn't match the cuisine preference but has the preferred price point (1.0)
   - Rating (3.8) is relatively good

3. **Affamato** (Noida)
   - Perfect cuisine match: Italian, Pizza
   - Matches the preferred price point (1.0)
   - Rating is missing (0.0), which reduced its overall score

4. **The BBQ Garden** (Faridabad)
   - Different cuisine (North Indian, Seafood)
   - Price (2.0) is close to the preference
   - Missing rating data

5. **Aravali Owls** (Faridabad)
   - Different cuisine (North Indian, Chinese)
   - Price (2.0) is close to the preference
   - Missing rating data

## Interesting Observations
- The system balances multiple factors rather than just matching on cuisine
- Restaurants with missing ratings (shown as 0.0) are still included if they match on other criteria
- Location information is provided but doesn't appear to be a primary matching factor
- The first recommendation includes Italian cuisine but also offers other options (Mexican, North Indian), showing flexibility in the recommendations

This recommendation system effectively finds restaurants that match the user preferences across multiple attributes, prioritizing overall similarity rather than exact matches on any single criterion.
