**Create a restaurent recommendations system based on user preference**

In [3]:
# Import Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform
pd.reset_option('display.max_rows')

In [4]:

import warnings
warnings.filterwarnings("ignore")

In [6]:

# import csv file and vreate dataframe
df = pd.read_csv("restaurant.csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [7]:
# estracting the columns of the dataframe
df.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [8]:
df1 = df[['Restaurant ID','Restaurant Name','Cuisines','Aggregate rating','Votes']]
df1

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


**cleaning the data**

In [9]:
def dataDesc():
    listItem = []
    for col in df1.columns :
        listItem.append(
            [col,
            df1[col].dtype,
            df1[col].isna().sum(),
            round(df1[col].isna().sum()/len(df1)*100,2),
            df1[col].nunique(),
            list(df1[col].drop_duplicates().sample(2).values)]
        )
    descData = pd.DataFrame(data = listItem,
                            columns = ['Column','Data Type', 'Missing Value',
                                        'Pct Missing Value', 'Num Unique', 'Unique Sample'])
    return descData

dataDesc()

Unnamed: 0,Column,Data Type,Missing Value,Pct Missing Value,Num Unique,Unique Sample
0,Restaurant ID,int64,0,0.0,9551,"[18332051, 15717]"
1,Restaurant Name,object,0,0.0,7446,"[Baltazar, Radha Swami Chaat Bhandar]"
2,Cuisines,object,9,0.09,1825,"[North Indian, Hyderabadi, Kashmiri, Chinese, ..."
3,Aggregate rating,float64,0,0.0,33,"[3.8, 2.1]"
4,Votes,int64,0,0.0,1012,"[344, 1004]"


In [12]:
df1=df1.dropna()
df1

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [14]:
# Renaming the Columns
df1 = df1.rename(columns={'Restaurant ID': 'restaurant_id'})
df1 = df1.rename(columns={'Restaurant Name': 'restaurant_name'})
df1 = df1.rename(columns={'Cuisines': 'cuisines'})
df1 = df1.rename(columns={'Aggregate rating': 'aggregate_rating'})
df1 = df1.rename(columns={'Votes': 'votes'})
df1

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [15]:
#removing duplicates from the data frame
df1.duplicated().sum()


0

In [17]:
df1['restaurant_name'].duplicated().sum()

2105

In [18]:
df1['restaurant_name'].value_counts()

restaurant_name
Cafe Coffee Day             83
Domino's Pizza              79
Subway                      63
Green Chick Chop            51
McDonald's                  48
                            ..
The Town House Cafe          1
The G.T. Road                1
The Darzi Bar & Kitchen      1
Smoke On Water               1
Walter's Coffee Roastery     1
Name: count, Length: 7437, dtype: int64

In [19]:
dfRS = df1.sort_values(by=['restaurant_name','aggregate_rating'],ascending=False)

In [22]:
df1[df1['restaurant_name']=="Starbucks"].head()

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
1241,306001,Starbucks,Cafe,3.9,560
1510,307786,Starbucks,Cafe,3.8,183
1561,311777,Starbucks,Cafe,3.5,66
1640,18272387,Starbucks,Cafe,3.5,16
1854,305272,Starbucks,Cafe,3.7,269


In [25]:
#Dropping duplicates 
df1=df1.drop_duplicates('restaurant_name',keep='first')
df1

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [26]:
df1['restaurant_name'].value_counts()

restaurant_name
Le Petit Souffle                1
Hook N Cook                     1
Nand Bhai Chholey Bhature       1
N.S. Pizza Point                1
Munna Bakery                    1
                               ..
Gole Hatti                      1
Bishan Swaroop Chaat Bhandar    1
Annapurna Bhandar               1
Amritsari Lassi Wala            1
Walter's Coffee Roastery        1
Name: count, Length: 7437, dtype: int64

In [27]:
df1=df1[df1['aggregate_rating']>=4.0]
df1

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4.4,270
3,6318506,Ooma,"Japanese, Sushi",4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4.8,229
...,...,...,...,...,...
9545,5915054,Baltazar,"Burger, Izgara",4.3,870
9546,5915730,Naml۱ Gurme,Turkish,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",4.2,1034
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [28]:
df1['cuisines']=df1['cuisines'].str.split(',')
df1

Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,"[French, Japanese, Desserts]",4.8,314
1,6304287,Izakaya Kikufuji,[Japanese],4.5,591
2,6300002,Heat - Edsa Shangri-La,"[Seafood, Asian, Filipino, Indian]",4.4,270
3,6318506,Ooma,"[Japanese, Sushi]",4.9,365
4,6314302,Sambo Kojin,"[Japanese, Korean]",4.8,229
...,...,...,...,...,...
9545,5915054,Baltazar,"[Burger, Izgara]",4.3,870
9546,5915730,Naml۱ Gurme,[Turkish],4.1,788
9547,5908749,Ceviz A��ac۱,"[World Cuisine, Patisserie, Cafe]",4.2,1034
9549,5916112,A���k Kahve,[Restaurant Cafe],4.0,901


In [29]:
df1=df1.explode('cuisines')
df1


Unnamed: 0,restaurant_id,restaurant_name,cuisines,aggregate_rating,votes
0,6317637,Le Petit Souffle,French,4.8,314
0,6317637,Le Petit Souffle,Japanese,4.8,314
0,6317637,Le Petit Souffle,Desserts,4.8,314
1,6304287,Izakaya Kikufuji,Japanese,4.5,591
2,6300002,Heat - Edsa Shangri-La,Seafood,4.4,270
...,...,...,...,...,...
9547,5908749,Ceviz A��ac۱,World Cuisine,4.2,1034
9547,5908749,Ceviz A��ac۱,Patisserie,4.2,1034
9547,5908749,Ceviz A��ac۱,Cafe,4.2,1034
9549,5916112,A���k Kahve,Restaurant Cafe,4.0,901


In [30]:
df1['cuisines'].value_counts()



cuisines
 Italian        157
North Indian    152
 Chinese        141
Cafe            135
 Continental    121
               ... 
 Awadhi           1
 Bengali          1
Bihari            1
 African          1
 B�_rek           1
Name: count, Length: 211, dtype: int64

In [31]:
# Cross Tabulate Restaurant Name and Cuisines
xCuisines = pd.crosstab(df1['restaurant_name'],
                                df1['cuisines'])

In [33]:
xCuisines

cuisines,Afghani,African,American,Andhra,Arabian,Argentine,Asian,Australian,Awadhi,BBQ,...,Sushi,Taiwanese,Tapas,Tea,Thai,Turkish,Turkish Pizza,Vietnamese,Western,World Cuisine
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10 Downing Street,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11th Avenue Cafe Bistro,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
145 Kala Ghoda,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19 Flavours Biryani,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feel ALIVE,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
sketch Gallery,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tashas,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
{Niche} - Cafe & Bar,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
xCuisines.loc['feel ALIVE'].values

array([0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [36]:
df1['restaurant_name'].sample(20,random_state=101)

2292                11th Avenue Cafe Bistro
132                         Mellow Mushroom
3098                                Caffe 9
587                         Barbeque Nation
9164                        The Sassy Spoon
7046                   Young Wild Free Cafe
6143                           Chor Bizarre
433                          Eggs 'n Things
748                   Sagar Gaire Fast Food
6618                      Dukes Pastry Shop
375              New Yorker Deli & Pizzeria
2                    Heat - Edsa Shangri-La
2350          Zolocrust - Hotel Clarks Amer
7046                   Young Wild Free Cafe
943                           Cafe Parmesan
7041    QRO Gourmeteriia BY DARK HOUSE KAFE
8076                         The Bento Cafe
571        Cho Gao - Crowne Plaza Abu Dhabi
9522              Me��hur Tavac۱ Recep Usta
161                         The Egg Factory
Name: restaurant_name, dtype: object

In [53]:
#similarity
sim=jaccard_score(xCuisines.loc["Caffe 9"].values,xCuisines.loc["11th Avenue Cafe Bistro"].values)

In [54]:
print(sim)

0.2857142857142857


In [55]:
# Create Similarity Value DF
jaccardDist = pdist(xCuisines.values, metric='jaccard')
jaccardMatrix = squareform(jaccardDist)
jaccardSim = 1 - jaccardMatrix
dfJaccard = pd.DataFrame(
    jaccardSim,
    index=xCuisines.index,
    columns=xCuisines.index)

dfJaccard

restaurant_name,'Ohana,10 Downing Street,11th Avenue Cafe Bistro,145 Kala Ghoda,19 Flavours Biryani,1918 Bistro & Grill,2 Dog,22nd Parallel,3 Wise Monkeys,38 Barracks,...,Zoeys Pizzeria,Zolocrust - Hotel Clarks Amer,Zombie Burger + Drink Lab,Zuka Choco-la,Zunzi's,feel ALIVE,sketch Gallery,tashas,{Niche} - Cafe & Bar,�ukura��a Sofras۱
restaurant_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'Ohana,1.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.00,0.000000,0.0,0.0,0.000000,0.0
10 Downing Street,0.0,1.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.200000,...,0.0,0.000000,0.0,0.0,0.00,0.200000,0.0,0.0,0.500000,0.0
11th Avenue Cafe Bistro,0.0,0.0,1.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.166667,0.0,0.0,0.00,0.142857,0.0,0.2,0.333333,0.0
145 Kala Ghoda,0.0,0.0,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.2,0.0,0.00,0.000000,0.0,0.0,0.000000,0.0
19 Flavours Biryani,0.0,0.0,0.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.00,0.000000,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feel ALIVE,0.0,0.2,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.600000,...,0.0,0.000000,0.0,0.0,0.00,1.000000,0.0,0.0,0.142857,0.0
sketch Gallery,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.00,0.000000,1.0,0.0,0.000000,0.0
tashas,0.0,0.0,0.200000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.25,0.000000,0.0,1.0,0.000000,0.0
{Niche} - Cafe & Bar,0.0,0.5,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.166667,0.0,0.0,0.00,0.142857,0.0,0.0,1.000000,0.0


In [56]:
df1['restaurant_name'].sample(20)

42              Pesqueiro Eco Gourmet
9169                 Agent Jack's Bar
1859               BRONX Bar Exchange
449              Lulu's Chocolate Bar
2310                        Mocha Bar
2284                        Confucius
2443                       Bistro 226
773        Mamma Mia - Mayfair Lagoon
8354    DIOS The Neighbourhood Bistro
9365                    The Boozy Cow
799                      Super Donuts
168       Granite City Food & Brewery
647                            @Mango
7036                     Cafe Kazbaah
1251               Dhaba By Claridges
180                 Atlas World Grill
790                  Kylin Experience
839                   Sree Annapoorna
1419                 Cafe Soul Garden
6522            Karnataka Food Centre
Name: restaurant_name, dtype: object

*recommendation for new one*

In [69]:
res = 'BlackStone'

sim1 = dfJaccard.loc[res].sort_values(ascending=False)

sim1 = pd.DataFrame({'restaurant_name': sim1.index, 'sim1Score': sim1.values})
sim1 = sim1[(sim1['restaurant_name']!= res) & (sim1['sim1Score']>=0.7)].head(5)

# Merge The Rating
Resc = pd.merge(sim1,df1[['restaurant_name','aggregate_rating']],how='inner',on='restaurant_name')
Final = Resc.sort_values('aggregate_rating',ascending=False).drop_duplicates('restaurant_name',keep='first')

In [70]:
Final

Unnamed: 0,restaurant_name,sim1Score,aggregate_rating
0,Henry's,1.0,4.1
2,B. Matthew's Eatery,1.0,4.1


***After training with the starbucks we test with the restaurant with*** ***the BlackStone where it suggests the two restaurant with the help of the best rating as effective***