In [1]:
#define objective
#collect the data from zomato 
#import the libraries 
#load the data 
#data analysis
#data preprocessing
#build your model

1. **Loading the dataset:** Load the data and import the libraries. 
2. **Data Cleaning:** 
 - Deleting redundant rows.
 - Renaming the columns.
 - Dropping duplicates.
 - Cleaning individual columns.
 - Remove the NaN values from the dataset
 - #Some Transformations
3. **Text Preprocessing**
 - Cleaning unnecessary words in the reviews
 - Removing links and other unncessary items
 - Removing Symbols
4. **Recommendation System**

### Importing Libraries

In [2]:
#Importing Libraries
import numpy as np 
import pandas as pd

import re
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

### Loading the dataset

In [3]:
#reading the dataset
zomato_real=pd.read_csv("zomato_min_data.csv")
zomato_real.head() # prints the first N rows of a DataFrame

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [4]:
zomato_real.info()  #stats and datatypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          10000 non-null  object
 1   address                      10000 non-null  object
 2   name                         10000 non-null  object
 3   online_order                 10000 non-null  object
 4   book_table                   10000 non-null  object
 5   rate                         8628 non-null   object
 6   votes                        10000 non-null  int64 
 7   phone                        9820 non-null   object
 8   location                     9998 non-null   object
 9   rest_type                    9949 non-null   object
 10  dish_liked                   4450 non-null   object
 11  cuisines                     9990 non-null   object
 12  approx_cost(for two people)  9980 non-null   object
 13  reviews_list                 100

### Data Cleaning 

In [5]:
#Deleting Unnnecessary Columns
zomato=zomato_real.drop(['url','dish_liked','phone'],axis=1) #Dropping the column "dish_liked", "phone", "url" and saving the new dataset as "zomato"

In [6]:
zomato

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,"199/C, 1st Stage, Mahaganapathi Nagar, Basaves...",Sree Banashankari Donne Biriyani,Yes,No,NEW,0,Rajajinagar,Quick Bites,"North Indian, Kebab",250,[],"['Biryani Rice', 'Mutton Biryani', 'Chicken Le...",Delivery,BTM
9996,"90/3, Shiva Krupa, Opposite Syndicate Bank, Bi...",Northern Bites,No,No,NEW,0,Bannerghatta Road,Quick Bites,"Chinese, North Indian, Biryani",350,[],[],Delivery,BTM
9997,"4, 1st Cross, Christ School Road, Bhavani Layo...",Cafe Arabica,No,No,3.8/5,69,Bannerghatta Road,"Cafe, Bakery","Cafe, Bakery",700,"[('Rated 3.0', 'RATED\n Went to have dessert....",[],Delivery,BTM
9998,"Venkatapura Main Road, 12th Cross Road, Near G...",Swa-Desh A Family Restaurant,No,No,,0,Koramangala 1st Block,Quick Bites,North Indian,300,"[('Rated 5.0', 'RATED\n This restaurant was v...",[],Delivery,BTM


In [7]:
#Removing the Duplicates
zomato.duplicated().sum()
zomato.drop_duplicates(inplace=True)

In [8]:
zomato

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,"199/C, 1st Stage, Mahaganapathi Nagar, Basaves...",Sree Banashankari Donne Biriyani,Yes,No,NEW,0,Rajajinagar,Quick Bites,"North Indian, Kebab",250,[],"['Biryani Rice', 'Mutton Biryani', 'Chicken Le...",Delivery,BTM
9996,"90/3, Shiva Krupa, Opposite Syndicate Bank, Bi...",Northern Bites,No,No,NEW,0,Bannerghatta Road,Quick Bites,"Chinese, North Indian, Biryani",350,[],[],Delivery,BTM
9997,"4, 1st Cross, Christ School Road, Bhavani Layo...",Cafe Arabica,No,No,3.8/5,69,Bannerghatta Road,"Cafe, Bakery","Cafe, Bakery",700,"[('Rated 3.0', 'RATED\n Went to have dessert....",[],Delivery,BTM
9998,"Venkatapura Main Road, 12th Cross Road, Near G...",Swa-Desh A Family Restaurant,No,No,,0,Koramangala 1st Block,Quick Bites,North Indian,300,"[('Rated 5.0', 'RATED\n This restaurant was v...",[],Delivery,BTM


In [9]:
#Remove the NaN values from the dataset
zomato.isnull().sum()
zomato.dropna(how='any',inplace=True)

In [10]:
zomato.isnull().sum()

address                        0
name                           0
online_order                   0
book_table                     0
rate                           0
votes                          0
location                       0
rest_type                      0
cuisines                       0
approx_cost(for two people)    0
reviews_list                   0
menu_item                      0
listed_in(type)                0
listed_in(city)                0
dtype: int64

In [11]:
zomato

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9993,"37/3, 2nd Main, Roopena Agrahara, Behind TVS S...",Star Kitchen,No,No,3.3/5,9,Bommanahalli,Quick Bites,"Chinese, North Indian, Biryani",150,[],[],Delivery,BTM
9994,"Arjun Aura Apartment, Vijaya Bank Layout, Bann...",Flavorsome Bakes,No,No,4.0/5,93,Bannerghatta Road,"Takeaway, Delivery","Bakery, Desserts",800,"[('Rated 5.0', 'RATED\n I ordered a chocolate...",[],Delivery,BTM
9995,"199/C, 1st Stage, Mahaganapathi Nagar, Basaves...",Sree Banashankari Donne Biriyani,Yes,No,NEW,0,Rajajinagar,Quick Bites,"North Indian, Kebab",250,[],"['Biryani Rice', 'Mutton Biryani', 'Chicken Le...",Delivery,BTM
9996,"90/3, Shiva Krupa, Opposite Syndicate Bank, Bi...",Northern Bites,No,No,NEW,0,Bannerghatta Road,Quick Bites,"Chinese, North Indian, Biryani",350,[],[],Delivery,BTM


In [12]:
#Reading Column Names
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
       'reviews_list', 'menu_item', 'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [13]:
#Changing the column names
zomato = zomato.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type',
                                  'listed_in(city)':'city'})
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
       'menu_item', 'type', 'city'],
      dtype='object')

In [14]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [15]:
zomato.cost

0       800
1       800
2       800
3       300
4       600
       ... 
9993    150
9994    800
9995    250
9996    350
9997    700
Name: cost, Length: 8568, dtype: object

In [16]:
#Some Transformations
zomato['cost'] = zomato['cost'].astype(str) #Changing the cost to string
zomato['cost'] = zomato['cost'].apply(lambda x: x.replace(',','.')) #Using lambda function to replace ',' from cost
zomato['cost'] = zomato['cost'].astype(float) # Changing the cost to Float

In [17]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8568 entries, 0 to 9997
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   address       8568 non-null   object 
 1   name          8568 non-null   object 
 2   online_order  8568 non-null   object 
 3   book_table    8568 non-null   object 
 4   rate          8568 non-null   object 
 5   votes         8568 non-null   int64  
 6   location      8568 non-null   object 
 7   rest_type     8568 non-null   object 
 8   cuisines      8568 non-null   object 
 9   cost          8568 non-null   float64
 10  reviews_list  8568 non-null   object 
 11  menu_item     8568 non-null   object 
 12  type          8568 non-null   object 
 13  city          8568 non-null   object 
dtypes: float64(1), int64(1), object(12)
memory usage: 1004.1+ KB


In [18]:
zomato.cost.unique()

array([800.  , 300.  , 600.  , 700.  , 550.  , 500.  , 450.  , 650.  ,
       400.  , 900.  , 200.  , 750.  , 150.  , 850.  , 100.  ,   1.2 ,
       350.  , 250.  , 950.  ,   1.  ,   1.5 ,   1.3 , 199.  ,  80.  ,
         1.1 , 160.  ,   1.6 , 230.  , 130.  ,   1.7 ,   1.4 ,   1.35,
         2.2 ,   2.  ,   1.8 ,   1.9 , 180.  , 330.  ,   2.5 ,   2.1 ,
         3.  ,   2.8 ,   3.4 ,  50.  ,  40.  ,   1.25,   3.5 ,   4.  ,
         2.4 ,   2.6 ,   1.45])

In [19]:
#Reading Rate of dataset
zomato['rate'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5'],
      dtype=object)

In [22]:
#Removing '/5' from Rates
zomato = zomato.loc[zomato.rate !='NEW']
zomato = zomato.loc[zomato.rate !='-'].reset_index(drop=True)
remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x
zomato.rate = zomato.rate.apply(remove_slash).astype('str').astype('float')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  remove_slash = lambda x: x.replace('/5', '') if type(x) == np.str else x


In [23]:
zomato['rate'].head(10)

0    4.1
1    4.1
2    3.8
3    3.7
4    3.8
5    3.8
6    3.6
7    4.6
8    4.0
9    4.2
Name: rate, dtype: float64

In [24]:
# Adjust the column names
zomato.name = zomato.name.apply(lambda x:x.title())
zomato.online_order.replace(('Yes','No'),(True, False),inplace=True)
zomato.book_table.replace(('Yes','No'),(True, False),inplace=True)

In [26]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [27]:
zomato['city'].unique()

array(['Banashankari', 'Bannerghatta Road', 'Basavanagudi', 'Bellandur',
       'Brigade Road', 'Brookefield', 'BTM'], dtype=object)

In [28]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [29]:
## Checking Null values
zomato.isnull().sum()

address         0
name            0
online_order    0
book_table      0
rate            0
votes           0
location        0
rest_type       0
cuisines        0
cost            0
reviews_list    0
menu_item       0
type            0
city            0
dtype: int64

In [30]:
## Computing Mean Rating
restaurants = list(zomato['name'].unique())
zomato['Mean Rating'] = 0

for i in range(len(restaurants)):
    zomato['Mean Rating'][zomato['name'] == restaurants[i]] = zomato['rate'][zomato['name'] == restaurants[i]].mean()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  zomato['Mean Rating'][zomato['name'] == restaurants[i]] = zomato['rate'][zomato['name'] == restaurants[i]].mean()


In [31]:
len(restaurants)

3619

In [32]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.114286
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.8


In [33]:
zomato.sample(10)

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
149,"Opposite BNM School, Near BDA complex, Banasha...",Chilli Flakes,True,False,3.6,46,Banashankari,Quick Bites,"Fast Food, Italian, Beverages",500.0,"[('Rated 2.0', ""RATED\n This was Zomato Deliv...",[],Delivery,Banashankari,3.6
4500,"949, 3rd Floor, 12th Main, Off 100 Feet Road, ...",The Humming Tree,True,False,4.0,2418,Indiranagar,Lounge,"Fast Food, European",1.4,"[('Rated 1.0', ""RATED\n Well if Zomato had ne...",[],Delivery,Brigade Road,4.0
234,"43/2, 100 Feet Road, Kamakya Complex, 3rd Stag...",New Prashanth Hotel,True,False,3.9,388,Banashankari,Casual Dining,"Biryani, North Indian, Seafood, Andhra",650.0,"[('Rated 4.0', ""RATED\n This place is one of ...",[],Delivery,Banashankari,3.538462
4995,"Hyatt Centric MG Road Banaglore, 1/1, Swami Vi...",The Bengaluru Brasserie - Hyatt Centric Mg Road,False,True,4.1,117,Ulsoor,Casual Dining,"Continental, Asian, North Indian, South Indian",2.0,"[('Rated 4.0', ""RATED\n A friend suggested th...",[],Dine-out,Brigade Road,4.1
7553,"99, 2nd Floor, ARD Complex, 5th Cross, Koraman...",China Bowl,True,False,3.9,455,Koramangala 6th Block,Casual Dining,Chinese,1.0,"[('Rated 4.0', 'RATED\n Hey CB ?\nI ordered t...",[],Delivery,BTM,3.9
5493,"G2-32, Inorbit Mall, EPIP Area",The Chocolate Heaven,True,False,3.1,175,Whitefield,"Dessert Parlor, Quick Bites","Desserts, Cafe",600.0,"[('Rated 3.0', ""RATED\n Ambience 3.5/5\nServi...","['Brownie Sundae', 'Chocolate Sandwich', 'Nute...",Cafes,Brookefield,3.1
242,"1st Main Road, Near Katte Balaga, NR Colony, B...",South Kitchen,True,False,4.3,275,Basavanagudi,Quick Bites,South Indian,100.0,"[('Rated 5.0', ""RATED\n This is again a IDLY-...","['Idli [2 Pieces]', 'Vada [2 Pieces]', 'Savige...",Delivery,Banashankari,4.3
19,"47, 48 &49, 3Rd Floor, 21st Main Road, 2nd Sta...",360 Atoms Restaurant And Cafe,True,False,3.1,13,Banashankari,Cafe,"Cafe, Chinese, Continental, Italian",400.0,"[('Rated 5.0', 'RATED\n Friendly staffs , nic...",[],Cafes,Banashankari,3.1
2333,"22/23, 7th Main, 7th Cross, Near Jayanti Garde...",Ice Land,True,False,3.6,34,JP Nagar,Dessert Parlor,"Ice Cream, Desserts",300.0,"[('Rated 5.0', 'RATED\n Amazing ??'), ('Rated...",[],Delivery,Basavanagudi,3.6
6196,"35/1B, Near Village Market, Marathahalli, Bang...",Daily Bread,False,False,3.6,18,Marathahalli,"Bakery, Quick Bites","Bakery, Fast Food, Italian",600.0,"[('Rated 3.0', 'RATED\n I have tried ? bread ...",[],Desserts,Brookefield,3.333333


In [34]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range = (1,5))

zomato[['Mean Rating']] = scaler.fit_transform(zomato[['Mean Rating']]).round(2)

In [35]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,3.99
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,3.97
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.58
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.45
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.58


In [36]:
## Text Preprocessing

In [37]:
from nltk.corpus import stopwords
stopwords.words('english')
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

Some of the common text preprocessing / cleaning steps are:

 - Lower casing
 - Removal of Punctuations
 - Removal of Stopwords
 - Removal of URLs
 - Spelling correction

In [38]:
# 5 examples of these columns before text processing:
zomato[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
2085,"[('Rated 1.0', 'RATED\n The food was prepared...",Street Food
2056,"[('Rated 1.0', 'RATED\n Food orders here was ...","North Indian, Chinese"
1220,"[('Rated 4.0', 'RATED\n This is the authentic...","Beverages, Ice Cream"
5330,"[('Rated 4.0', 'RATED\n What a great place ! ...",Modern Indian
5556,"[('Rated 5.0', 'RATED\n A must visit place to...","Chinese, Momos"


In [39]:
## Lower Casing
zomato["reviews_list"] = zomato["reviews_list"].str.lower()
zomato[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
1332,"[('rated 3.0', 'rated\n i head that this plac...","North Indian, Chinese"
3194,"[('rated 1.0', 'rated\n expected it to be muc...",Fast Food
5390,"[('rated 4.0', ""rated\n for bengali cuisine i...","Bengali, Seafood"
1237,"[('rated 4.0', ""rated\n the packing is amazin...",Bakery
1475,"[('rated 3.0', ""rated\n baskij robbins is a p...","Ice Cream, Desserts"


In [40]:
## Removal of Puctuations
import string
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_punctuation(text))
zomato[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
2599,rated 10 ratedn bad taste not so great qualit...,Bakery
1212,rated 10 ratedn firstly v ordered the food ho...,Chinese
141,rated 10 ratedn i packed a full meal and in t...,South Indian
2803,rated 40 ratedn easy to locatenvfm 355ntaste ...,"Cafe, Mexican, Italian, Momos, Beverages"
4037,rated 40 ratedn glens bakehouse ask anyone wh...,"Bakery, Cafe, Italian, Desserts"


In [41]:
## Removal of Stopwords
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_stopwords(text))

In [42]:
zomato[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
766,rated 20 ratedn unhygienic place chicken taste...,"Pizza, Cafe, Italian"
3775,rated 30 ratedn ambience service good burger s...,"Burger, Fast Food, Beverages"
7798,rated 40 ratedn chicken fried rice amazing tho...,"North Indian, Chinese, South Indian"
5525,rated 40 ratedn restaurant suggested one close...,"North Indian, Arabian, Mughlai, Biryani, Seafo..."
1810,rated 20 ratedn ordered chilli cheese sandwich...,"Cafe, Sandwich, North Indian, Beverages, Desserts"


In [43]:
## Removal of URLS
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)

zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_urls(text))

In [44]:
zomato[['reviews_list', 'cuisines']].sample(5)

Unnamed: 0,reviews_list,cuisines
5889,rated 40 ratedn mega bite located whitefield g...,"North Indian, Chinese, South Indian, Continental"
2273,rated 30 ratedn decent food nothing extraordin...,"Chinese, Momos, Thai, Vietnamese"
1669,rated 30 ratedn decent economic place sattu ka...,"Chinese, North Indian, Street Food"
6028,rated 20 ratedn ordered combo place swiggy del...,"North Indian, South Indian, Chinese"
5070,rated 20 ratedn visited place tuesday evening ...,"Cafe, Fast Food"


In [45]:
# RESTAURANT NAMES:
restaurant_names = list(zomato['name'].unique())
restaurant_names

['Jalsa',
 'Spice Elephant',
 'San Churro Cafe',
 'Addhuri Udupi Bhojana',
 'Grand Village',
 'Timepass Dinner',
 'Rosewood International Hotel - Bar & Restaurant',
 'Onesta',
 'Penthouse Cafe',
 'Smacznego',
 'Cafã\x83Â\x83Ã\x82Â\x83Ã\x83Â\x82Ã\x82Â\x83Ã\x83Â\x83Ã\x82Â\x82Ã\x83Â\x82Ã\x82Â© Down The Alley',
 'Cafe Shuffle',
 'The Coffee Shack',
 'Caf-Eleven',
 'Cafe Vivacity',
 'Catch-Up-Ino',
 "Kirthi'S Biryani",
 'T3H Cafe',
 '360 Atoms Restaurant And Cafe',
 'The Vintage Cafe',
 'Woodee Pizza',
 'Cafe Coffee Day',
 'My Tea House',
 'Hide Out Cafe',
 'Cafe Nova',
 'Coffee Tindi',
 'Sea Green Cafe',
 'Cuppa',
 "Srinathji'S Cafe",
 'Redberrys',
 'Foodiction',
 'Sweet Truth',
 'Ovenstory Pizza',
 'Faasos',
 'Behrouz Biryani',
 'Fast And Fresh',
 'Szechuan Dragon',
 'Empire Restaurant',
 'Maruthi Davangere Benne Dosa',
 'Chaatimes',
 'Havyaka Mess',
 "Mcdonald'S",
 "Domino'S Pizza",
 'Hotboxit',
 'Kitchen Garden',
 'Recipe',
 'Beijing Bites',
 'Tasty Bytes',
 'Petoo',
 'Shree Cool Point'

In [46]:
def get_top_words(column, top_nu_of_words):
    
    vec = CountVectorizer(stop_words='english')
    
    bag_of_words = vec.fit_transform(column)
    
    sum_words = bag_of_words.sum(axis=0)
    
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    
    return words_freq[:top_nu_of_words]

In [47]:
zomato.head()

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,True,True,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800.0,rated 40 ratedn beautiful place dine inthe int...,[],Buffet,Banashankari,3.99
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,True,False,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800.0,rated 40 ratedn dinner family turned good choo...,[],Buffet,Banashankari,3.97
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,True,False,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800.0,rated 30 ratedn ambience good enough pocket fr...,[],Buffet,Banashankari,3.58
3,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,False,False,3.7,88,Banashankari,Quick Bites,"South Indian, North Indian",300.0,rated 40 ratedn great food proper karnataka st...,[],Buffet,Banashankari,3.45
4,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,False,False,3.8,166,Basavanagudi,Casual Dining,"North Indian, Rajasthani",600.0,rated 40 ratedn good restaurant neighbourhood ...,[],Buffet,Banashankari,3.58


In [48]:
zomato.sample(5)

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost,reviews_list,menu_item,type,city,Mean Rating
3620,"49, First Floor, Shubh Enclave, Haralur Main R...",Papacream,False,False,3.8,28,Sarjapur Road,Dessert Parlor,"Desserts, Ice Cream",350.0,rated 40 ratedn excited try bubble waffles fir...,[],Desserts,Bellandur,3.5
7782,"#3, 2nd main, 11th Cross, Maruthinagar, Madiwa...",Parivaar,False,False,3.3,7,BTM,Delivery,"Biryani, North Indian, Chinese",600.0,rated 10 ratedn worst food ive ever hadntook a...,[],Delivery,BTM,2.94
3600,"Chavalkere Road, Bellandur, Bangalore",Oz Pies,True,False,3.8,37,Bellandur,Delivery,"Australian, Continental",400.0,rated 50 ratedn place accidental discovery ube...,[],Desserts,Bellandur,3.58
2763,"56/1, Sri Krishna Towers, 9th Main Road, 5th B...",Thyme & Whisk,True,False,4.2,109,Jayanagar,Casual Dining,"Asian, Chinese, Continental, Italian",800.0,rated 40 ratedn school besties reunion place r...,[],Dine-out,Basavanagudi,4.1
7438,"969, 1st Main, Koramangala 1st Block, Bangalore",Habibi Falafel,True,False,4.3,219,Koramangala 1st Block,Quick Bites,"Lebanese, Middle Eastern",300.0,rated 50 ratedn place go againnusps affordable...,[],Delivery,BTM,4.23


In [49]:
zomato.shape

(8015, 15)

In [50]:
zomato.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
       'menu_item', 'type', 'city', 'Mean Rating'],
      dtype='object')

In [51]:
zomato=zomato.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1) #remove unnessary columns dataset

In [52]:
# Randomly sample 50% of your dataframe
df_percent = zomato.sample(frac=0.5)

In [53]:
df_percent

Unnamed: 0,name,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
2402,Namma Kudla,True,False,3.9,JP Nagar,"Mangalorean, North Indian, Chinese, Seafood",400.0,rated 40 ratedn small cozy karnataka coastal r...,Basavanagudi,3.71
1950,Cafe Cosy,False,True,4.1,Jayanagar,"Cafe, Chinese, Continental, Fast Food",800.0,rated 50 ratedn ambience great hookah flavour ...,Basavanagudi,3.97
1151,Desi Dhaba,True,False,3.3,BTM,"North Indian, Chinese",400.0,rated 10 ratedn much rice three people eat les...,Bannerghatta Road,2.94
3773,Just Bake,False,False,3.7,Bellandur,"Bakery, Desserts",400.0,rated 40 ratedn order cake friends birthdays m...,Bellandur,3.07
5272,Kamat Restaurant,True,False,3.6,City Market,"South Indian, North Indian, Chinese, Desserts,...",400.0,rated 10 ratedn one worst hotel food life got ...,Brigade Road,3.32
...,...,...,...,...,...,...,...,...,...,...
1132,The Chervil,True,False,4.2,JP Nagar,"North Indian, Continental",1.0,rated 10 ratedn food good experience restauran...,Bannerghatta Road,4.10
6444,Food Square,True,False,3.6,Marathahalli,"Fast Food, Chinese",200.0,rated 10 ratedn worst food ever tried life chi...,Brookefield,3.32
1629,New Kabab Zone,True,False,3.5,Bannerghatta Road,"North Indian, Chinese, Biryani, Rolls",500.0,rated 50 ratedn ordered form couple time nownn...,Bannerghatta Road,3.32
4367,Brownie Heaven,True,False,4.2,Cunningham Road,"Desserts, Fast Food",300.0,rated 40 ratedn wanted something sweet search ...,Brigade Road,3.90


### Term Frequency-Inverse Document Frequency

In [54]:
df_percent.set_index('name', inplace=True)

In [55]:
df_percent

Unnamed: 0_level_0,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Namma Kudla,True,False,3.9,JP Nagar,"Mangalorean, North Indian, Chinese, Seafood",400.0,rated 40 ratedn small cozy karnataka coastal r...,Basavanagudi,3.71
Cafe Cosy,False,True,4.1,Jayanagar,"Cafe, Chinese, Continental, Fast Food",800.0,rated 50 ratedn ambience great hookah flavour ...,Basavanagudi,3.97
Desi Dhaba,True,False,3.3,BTM,"North Indian, Chinese",400.0,rated 10 ratedn much rice three people eat les...,Bannerghatta Road,2.94
Just Bake,False,False,3.7,Bellandur,"Bakery, Desserts",400.0,rated 40 ratedn order cake friends birthdays m...,Bellandur,3.07
Kamat Restaurant,True,False,3.6,City Market,"South Indian, North Indian, Chinese, Desserts,...",400.0,rated 10 ratedn one worst hotel food life got ...,Brigade Road,3.32
...,...,...,...,...,...,...,...,...,...
The Chervil,True,False,4.2,JP Nagar,"North Indian, Continental",1.0,rated 10 ratedn food good experience restauran...,Bannerghatta Road,4.10
Food Square,True,False,3.6,Marathahalli,"Fast Food, Chinese",200.0,rated 10 ratedn worst food ever tried life chi...,Brookefield,3.32
New Kabab Zone,True,False,3.5,Bannerghatta Road,"North Indian, Chinese, Biryani, Rolls",500.0,rated 50 ratedn ordered form couple time nownn...,Bannerghatta Road,3.32
Brownie Heaven,True,False,4.2,Cunningham Road,"Desserts, Fast Food",300.0,rated 40 ratedn wanted something sweet search ...,Brigade Road,3.90


In [56]:
indices = pd.Series(df_percent.index)

In [57]:
indices

0            Namma Kudla
1              Cafe Cosy
2             Desi Dhaba
3              Just Bake
4       Kamat Restaurant
              ...       
4003         The Chervil
4004         Food Square
4005      New Kabab Zone
4006      Brownie Heaven
4007        Mayura Grand
Name: name, Length: 4008, dtype: object

In [58]:
# Creating tf-idf matrix
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_percent['reviews_list'])

In [59]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [60]:
len(cosine_similarities)

4008

In [61]:
a=[1,2,3,4,5,6,7,7,8,8,9]
a[0:6]

[1, 2, 3, 4, 5, 6]

In [62]:
def recommend(name, cosine_similarities = cosine_similarities):
    
    # Create a list to put top 10 restaurants
    recommend_restaurant = []
    
    # Find the index of the hotel entered
    idx = indices[indices == name].index[0]
    
    # Find the restaurants with a similar cosine-sim value and order them from bigges number
    score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    
    # Extract top 30 restaurant indexes with a similar cosine-sim value
    top30_indexes = list(score_series.iloc[0:31].index)
    
    # Names of the top 30 restaurants
    for each in top30_indexes:
        recommend_restaurant.append(list(df_percent.index)[each])
    
    # Creating the new data set to show similar restaurants
    df_new = pd.DataFrame(columns=['cuisines', 'Mean Rating', 'cost'])
    
    # Create the top 30 similar restaurants with some of their columns
    for each in recommend_restaurant:
        df_new = df_new.append(pd.DataFrame(df_percent[['cuisines','Mean Rating', 'cost']][df_percent.index == each].sample()))
    
    # Drop the same named restaurants and sort only the top 10 by the highest rating
    df_new = df_new.drop_duplicates(subset=['cuisines','Mean Rating', 'cost'], keep=False)
    df_new = df_new.sort_values(by='Mean Rating', ascending=False).head()
    
    print('TOP %s RESTAURANTS LIKE %s WITH SIMILAR REVIEWS: ' % (str(len(df_new)), name))
    
    return df_new

In [63]:
# HERE IS A RANDOM RESTAURANT. LET'S SEE THE DETAILS ABOUT THIS RESTAURANT:
df_percent[df_percent.index == 'Pai Vihar'].head()

Unnamed: 0_level_0,online_order,book_table,rate,location,cuisines,cost,reviews_list,city,Mean Rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Pai Vihar,False,False,3.2,City Market,"South Indian, Street Food, Chinese, Fast Food",400.0,rated 20 ratedn food dry bland dont understand...,Brigade Road,2.55


In [64]:
recommend("Icebreakers")

TOP 5 RESTAURANTS LIKE Icebreakers WITH SIMILAR REVIEWS: 


Unnamed: 0,cuisines,Mean Rating,cost
Icebreakers,"Desserts, Ice Cream",4.23,400.0
Dice N Dine,"Continental, Cafe, Italian, Fast Food, Steak",4.23,900.0
The Marash,"Turkish, Desserts",4.23,250.0
Pabrai'S Fresh & Naturelle Icecreams,"Ice Cream, Desserts",4.14,180.0
Ice Cream Works,"Desserts, Ice Cream",3.97,300.0
