# **Task** 2: Restaurant Recommendation

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings

warnings.filterwarnings('ignore')

In [None]:
# Load the dataset
file_path = '/content/drive/MyDrive/ML Projects/Cognifyz Technologies/MACHINE LEARNING TASK LIST AND DATASET/Restaurant_Reviews.csv'
df = pd.read_csv(file_path)
data = df

In [None]:
# Display dataset info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [None]:
# Display data types of all columns
# print("Data Types of Each Column:")
# print(data.dtypes)

## Step 1: Preprocess the dataset

In [None]:
# print("Missing Values:\n", df.isnull().sum())
data.fillna(data.median(numeric_only=True), inplace=True)
data.fillna('Unknown', inplace=True)

# Encoding categorical variables
label_encoders = {}
categorical_columns = ["Currency", "Has Table booking", "Has Online delivery", "Is delivering now", "Switch to order menu", "Rating color", "Rating text"]
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Combining important features for content-based filtering
# Features: Cuisines, City, and Price Range
data["combined_features"] = data["Cuisines"] + " " + data["City"] + " " + data["Price range"].astype(str)

## Step 2: Determine criteria for recommendations

In [None]:
# Vectorizing the combined features
cv = CountVectorizer()
feature_matrix = cv.fit_transform(data["combined_features"])

# Compute similarity matrix
similarity_matrix = cosine_similarity(feature_matrix)

## Step 3: Content-based filtering

In [None]:
def parse_user_input(user_input):
    user_criteria = user_input.split()  # Split input into tokens
    cuisine = None
    city = None
    price = None

    for item in user_criteria:
        if item.isdigit():  # Check if input is numeric (for price range)
            price = int(item)
        elif item.capitalize() in data["City"].unique():  # Match with cities
            city = item.capitalize()
        else:  # Assume remaining input is cuisine
            cuisine = item.capitalize()
    return cuisine, city, price

In [None]:
def recommend_restaurants(user_input, top_n=5):
    cuisine, city, price = parse_user_input(user_input)

    # Filter dataset
    filtered_data = data.copy()
    if cuisine:
        filtered_data = filtered_data[filtered_data["Cuisines"].str.contains(cuisine, case=False, na=False)]
    if city:
        filtered_data = filtered_data[filtered_data["City"].str.contains(city, case=False, na=False)]
    if price:
        # Allow a ±10% tolerance for price matching
        tolerance = 0.1
        filtered_data = filtered_data[
            (filtered_data["Average Cost for two"] >= price * (1 - tolerance)) &
            (filtered_data["Average Cost for two"] <= price * (1 + tolerance))
        ]

    # Sort by aggregate rating or any other criteria
    filtered_data = filtered_data.sort_values(by="Aggregate rating", ascending=False)

    # Return top-n recommendations
    return filtered_data[["Restaurant Name", "Cuisines", "City", "Average Cost for two", "Aggregate rating"]].head(top_n)


In [None]:
# Function to recommend restaurants based on user preferences
# def recommend_restaurants(user_input, top_n=5):
#     if user_input.isdigit():  # If input is a price range
#         filtered_data = data[data["Price range"] == int(user_input)]
#     else:
#         # Find the closest match to user input in "combined_features"
#         user_vector = cv.transform([user_input])
#         user_similarity = cosine_similarity(user_vector, feature_matrix)

#         # Get indices of top-n similar restaurants
#         similar_indices = user_similarity.argsort()[0][::-1][:top_n]

#         # Return the recommended restaurants
#         filtered_data = data.iloc[similar_indices]
#     return filtered_data[["Restaurant Name", "Cuisines", "City", "Aggregate rating", "Average Cost for two"]]

## Step 4: Test the recommendation system

In [None]:
# Provide sample user preferences
print("Input Foramt : Cuisine <space> City <space> Average Cost for two")
print("OR Give any one")
user_preferences = input("Enter your preferences (e.g., 'Italian Delhi 2'): ")  # User provides preferences
recommendations = recommend_restaurants(user_preferences)

# Display recommendations
print("\nTop Restaurant Recommendations:\n")
print(recommendations)

Enter your preferences (e.g., 'Italian Delhi 2'): uu

Top Restaurant Recommendations:

Empty DataFrame
Columns: [Restaurant Name, Cuisines, City, Average Cost for two, Aggregate rating]
Index: []
