### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform

### Load Dataset


In [2]:
df = pd.read_csv(r'C:\Users\Ashish Mishra\OneDrive\Desktop\Machine Learning\Dataset.csv')
df.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


### Data Preprocessing

In [3]:
dfRS = df[['Restaurant ID', 'Restaurant Name', 'Cuisines', 'Aggregate rating', 'Votes']]
dfRS.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229


### Exploring and Cleaning the Dataset

In [4]:
def data_description():
    """Function to get details about each column."""
    details = []
    for col in dfRS.columns:
        details.append([
            col,
            dfRS[col].dtype,
            dfRS[col].isna().sum(),
            round(dfRS[col].isna().sum() / len(dfRS) * 100, 2),
            dfRS[col].nunique(),
            list(dfRS[col].drop_duplicates().sample(2).values)
        ])
    return pd.DataFrame(details, columns=['Column', 'Data Type', 'Missing Value', 'Pct Missing', 'Unique Count', 'Sample Values'])

data_description()

Unnamed: 0,Column,Data Type,Missing Value,Pct Missing,Unique Count,Sample Values
0,Restaurant ID,int64,0,0.0,9551,"[18486847, 3182]"
1,Restaurant Name,object,0,0.0,7446,"[Mustake Multicuisine Restaurant, Aggarwal Bik..."
2,Cuisines,object,9,0.09,1825,"[European, North Indian, Continental, Italian,..."
3,Aggregate rating,float64,0,0.0,33,"[3.2, 3.9]"
4,Votes,int64,0,0.0,1012,"[510, 1351]"


In [5]:
dfRS = dfRS.dropna()
dfRS.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229


In [6]:
dfRS.rename(columns={
    'Restaurant ID': 'restaurant_id',
    'Restaurant Name': 'restaurant_name',
    'Cuisines': 'cuisines',
    'Aggregate rating': 'aggregate_rating',
    'Votes': 'votes'
}, inplace=True)

In [7]:
dfRS.duplicated().sum()

0

In [8]:
dfRS['restaurant_name'].duplicated().sum()

2105

In [9]:
dfRS = dfRS.sort_values(by=['restaurant_name', 'aggregate_rating'], ascending=False)
dfRS = dfRS.drop_duplicates('restaurant_name', keep='first')
dfRS.head()

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",4.1,492
9334,7100938,wagamama,"Japanese, Asian",3.7,131
9454,6401789,tashas,"Cafe, Mediterranean",4.1,374
4659,18361747,t Lounge by Dilmah,"Cafe, Tea, Desserts",3.6,34


In [10]:
dfRS = dfRS[dfRS['aggregate_rating'] >= 4.0]
dfRS.head()

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",4.1,492
9454,6401789,tashas,"Cafe, Mediterranean",4.1,374
9385,6113857,sketch Gallery,"British, Contemporary",4.5,148
1837,18418247,feel ALIVE,"North Indian, American, Asian, Biryani",4.7,69


In [11]:
xTabRestoCuisines = pd.crosstab(dfRS['restaurant_name'], dfRS['cuisines'])
xTabRestoCuisines.head()

cuisines,African,"African, Portuguese",American,"American, Asian, Burger","American, Asian, European, Seafood","American, Asian, Italian, Seafood","American, BBQ","American, BBQ, Burger","American, BBQ, Sandwich","American, BBQ, Southern",...,Turkish,Turkish Pizza,"Turkish, Arabian, Middle Eastern","Turkish, Mediterranean, Middle Eastern",Vietnamese,"Vietnamese, Fish and Chips","Western, Asian, Cafe",World Cuisine,"World Cuisine, Mexican, Italian","World Cuisine, Patisserie, Cafe"
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10 Downing Street,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11th Avenue Cafe Bistro,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
145 Kala Ghoda,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19 Flavours Biryani,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
jaccardDist = pdist(xTabRestoCuisines.values, metric='jaccard')

In [13]:
jaccardMatrix = squareform(jaccardDist)
jaccardSim = 1 - jaccardMatrix

In [14]:
dfJaccard = pd.DataFrame(
    jaccardSim,
    index=xTabRestoCuisines.index,
    columns=xTabRestoCuisines.index
)

dfJaccard.head()

restaurant_name,'Ohana,10 Downing Street,11th Avenue Cafe Bistro,145 Kala Ghoda,19 Flavours Biryani,1918 Bistro & Grill,2 Dog,22nd Parallel,3 Wise Monkeys,38 Barracks,...,Zoeys Pizzeria,Zolocrust - Hotel Clarks Amer,Zombie Burger + Drink Lab,Zuka Choco-la,Zunzi's,feel ALIVE,sketch Gallery,tashas,{Niche} - Cafe & Bar,�ukura��a Sofras۱
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 Downing Street,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11th Avenue Cafe Bistro,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
145 Kala Ghoda,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19 Flavours Biryani,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
input_restaurant = 'Ooma'

In [16]:
sim = dfJaccard.loc[input_restaurant].sort_values(ascending=False)
sim = pd.DataFrame({'restaurant_name': sim.index, 'simScore': sim.values})
sim = sim[(sim['restaurant_name'] != input_restaurant) & (sim['simScore'] >= 0.7)].head(5)
recommendations = pd.merge(
    sim,
    dfRS[['restaurant_name', 'aggregate_rating']],
    how='inner',
    on='restaurant_name'
)

In [17]:
recommendations = recommendations.sort_values('aggregate_rating', ascending=False).drop_duplicates('restaurant_name', keep='first')
recommendations

Unnamed: 0,restaurant_name,simScore,aggregate_rating
2,Nobu,1.0,4.4
3,Nagai,1.0,4.3
4,Ichiban,1.0,4.3
1,Osaka,1.0,4.2
0,Guppy,1.0,4.1


### Conclusion:
The above data will show up to the top 5 recommended restaurants with the best ratings. The ratings are curated to include only restaurants with an aggregate rating of 4 and above. This ensures that the recommendation system provides high-quality suggestions objectively.
```