## Restaurant Recommendation

In [2]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import re
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
# loading data
df = pd.read_csv('../Dataset.csv')
country_code = pd.read_excel('../Country Code.xlsx')

# merging data
df = pd.merge(df, country_code, on='Country Code',how='left')

In [4]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,Average Cost for two,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes,Country
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",1100,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314,Phillipines
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,1200,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591,Phillipines
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",4000,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270,Phillipines
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",1500,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365,Phillipines
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",1500,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229,Phillipines


In [5]:
# removing the missing values in cuisines column
df = df[~(df['Cuisines'].isnull())]

In [6]:
# renaming the features
df.rename(columns={
    'Restaurant Name':'Name',
    'Has Table booking':'Booking',	
    'Has Online delivery':'Online_delivery',
    'Is delivering now':'delivery_now_option',
    'Aggregate rating':'Rating'	
}, inplace=True)

# droping unnecessary features
df.drop(columns=['Restaurant ID', 'Country Code','Locality','Locality Verbose',
                 'Latitude', 'Longitude', 
                 'Switch to order menu'], axis=1, inplace=True)

In [7]:
# cost for 1 person
df['Cost'] = df['Average Cost for two']/2

In [14]:
# exporting this preprocessed data
df.to_csv('final_data.csv', index=False)

In [10]:
# Fit TF-IDF on entire dataset (before filtering)
tfidf_vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Cuisines'].fillna(""))

In [11]:
# Mapping currency symbols for cost
currency_symbols = {
   'Indian Rupees(Rs.)' : '₹',      
    'Dollar($)':'$' ,                 
    'Pounds(£)':'£',                  
    'Brazilian Real(R$)': 'R$',       
    'Rand(R)': 'R',            
    'Emirati Diram(AED)': 'د.إ',          
    'NewZealand($)': '$',           
    'Turkish Lira(TL)': 'TL',            
    'Botswana Pula(P)': 'P',        
    'Indonesian Rupiah(IDR)' :'Rp',     
    'Qatari Rial(QR)': 'QR',             
    'Sri Lankan Rupee(LKR)':'Rs' 
}

# Function to Format Cost with Currency Symbol
def format_cost(row):
    currency_symbol = currency_symbols.get(row['Currency'], row['Currency'])  # Get symbol or default to currency name
    return f"{currency_symbol}{row['Cost']}"

In [12]:
# Function to Recommend Restaurants
def restaurant_recommender(country, city, preferred_cuisines, price_range, top_n=5):
    # Filter by Country and City
    filtered_df = df[(df['Country'] == country) & (df['City'] == city)].copy()
    
    if filtered_df.empty:
        return "No restaurants found for the selected location."
    
    # Normalize Price Range
    scaler = StandardScaler()
    price_normalized = scaler.fit_transform(filtered_df[['Price range']])
    
    # Compute Similarity for Price Range
    price_sim = cosine_similarity(price_normalized)

    # Convert Selected Cuisines to TF-IDF Format
    preferred_cuisines = ' '.join(preferred_cuisines)  # Convert list to string
    input_vector = tfidf_vectorizer.transform([preferred_cuisines])

    # Compute Cosine Similarity for Cuisines
    input_cuisine_sim = cosine_similarity(input_vector, tfidf_matrix[df.index.isin(filtered_df.index)]).flatten()

    # Store Similarity Scores in Filtered DataFrame
    filtered_df['Cuisine Similarity'] = input_cuisine_sim
    filtered_df['Price Similarity'] = price_sim.diagonal()

    # Normalize Ratings (Higher Rating = More Recommended)
    filtered_df['Normalized Rating'] = filtered_df['Rating'] / filtered_df['Rating'].max()

    # Compute Final Score (Weighted)
    filtered_df['Final Score'] = (filtered_df['Cuisine Similarity'] * 0.5) + (filtered_df['Price Similarity'] * 0.2) + (filtered_df['Normalized Rating'] * 0.3)

    # Get Top N Recommendations
    recommendations = filtered_df.sort_values(by='Final Score', ascending=False).head(top_n)
    
    # Format Cost Column
    recommendations['Cost'] = recommendations.apply(format_cost, axis=1)

    return recommendations[['Name', 'Address', 'Cost', 'Rating']]

In [13]:
# getting recommendations
restaurant_recommender('India','New Delhi', ['Indian', 'Italian'], 3)

Unnamed: 0,Name,Address,Cost,Rating
7456,Cafe Knosh - The Leela Ambience Convention Hotel,"The Leela Ambience Convention Hotel, Maharaja ...",₹1350.0,3.9
5122,Food For Thought,"14, Navjivan Vihar, Malviya Nagar, New Delhi",₹350.0,3.8
3726,Chhalava - Lava,"M-40, Greater Kailash (GK) 2, New Delhi",₹600.0,4.4
6661,The Drunk House,"A-15, 2nd Floor, Vishal Enclave, Najafgarh Roa...",₹650.0,4.2
2620,Chaskaa,"Kamal Complex, Local Shopping Centre, Surajmal...",₹325.0,3.5
