### Zomato Recommendation System

In [1]:
# Importing Libraries

import numpy as np
import pandas as pd

#### Loading the dataset

In [2]:
df = pd.read_csv("zomato.csv")

In [3]:
print(df.shape)
df.head()

(51717, 17)


Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          51717 non-null  object
 1   address                      51717 non-null  object
 2   name                         51717 non-null  object
 3   online_order                 51717 non-null  object
 4   book_table                   51717 non-null  object
 5   rate                         43942 non-null  object
 6   votes                        51717 non-null  int64 
 7   phone                        50509 non-null  object
 8   location                     51696 non-null  object
 9   rest_type                    51490 non-null  object
 10  dish_liked                   23639 non-null  object
 11  cuisines                     51672 non-null  object
 12  approx_cost(for two people)  51371 non-null  object
 13  reviews_list                 51

#### Data Cleaning and Feature Engineering

In [5]:
# Deleting Columns which are not necessary
data = df.drop(["url", "address", "dish_liked", "phone", "menu_item", "reviews_list","listed_in(type)","listed_in(city)"], axis=1)

In [6]:
# Removing the Duplicates
print(data.duplicated().sum())
data = data.drop_duplicates()

28755


In [7]:
# Removing the NULL values
print(data.isnull().sum())
data = data.dropna(how="any")

name                              0
online_order                      0
book_table                        0
rate                           2724
votes                             0
location                         11
rest_type                        99
cuisines                         29
approx_cost(for two people)     143
dtype: int64


In [8]:
# Rename Columns
data.rename(columns={'approx_cost(for two people)':'cost'},inplace=True)
data.columns

Index(['name', 'online_order', 'book_table', 'rate', 'votes', 'location',
       'rest_type', 'cuisines', 'cost'],
      dtype='object')

In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20064 entries, 0 to 51677
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   name          20064 non-null  object
 1   online_order  20064 non-null  object
 2   book_table    20064 non-null  object
 3   rate          20064 non-null  object
 4   votes         20064 non-null  int64 
 5   location      20064 non-null  object
 6   rest_type     20064 non-null  object
 7   cuisines      20064 non-null  object
 8   cost          20064 non-null  object
dtypes: int64(1), object(8)
memory usage: 1.5+ MB


#### Transformations

In [10]:
# Cost
data["cost"] = data["cost"].apply(lambda x : x.replace(",","")).astype(np.float32)

In [11]:
# Rate
data["rate"] = data["rate"].apply(lambda x : x.replace("/5",""))
l1 = list(data[data["rate"] == "NEW"].index)
l2 = list(data[data["rate"] == '-'].index)
l1.extend(l2)
l = l1
data = data.drop(l)
data["rate"] = data["rate"].astype(np.float32)

In [12]:
# Online_Order and Book_Table
data["online_order"] = data["online_order"].replace(("Yes","No"),(True,False))
data["book_table"] = data["book_table"].replace(("Yes","No"),(True,False))

In [13]:
# Lowercase Conversion
data["location"] = data["location"].apply(lambda x : x.lower())
data["rest_type"] = data["rest_type"].apply(lambda x : x.lower())
data["cuisines"] = data["cuisines"].apply(lambda x : x.lower())

In [14]:
print("No of Null Values : ", data.isnull().sum().sum())
data.head()

No of Null Values :  0


Unnamed: 0,name,online_order,book_table,rate,votes,location,rest_type,cuisines,cost
0,Jalsa,True,True,4.1,775,banashankari,casual dining,"north indian, mughlai, chinese",800.0
1,Spice Elephant,True,False,4.1,787,banashankari,casual dining,"chinese, north indian, thai",800.0
2,San Churro Cafe,True,False,3.8,918,banashankari,"cafe, casual dining","cafe, mexican, italian",800.0
3,Addhuri Udupi Bhojana,False,False,3.7,88,banashankari,quick bites,"south indian, north indian",300.0
4,Grand Village,False,False,3.8,166,basavanagudi,casual dining,"north indian, rajasthani",600.0


### Recommendation System

In [15]:
df_filer = data.sample(frac = 0.25)
print(df_filer.shape)

(4784, 9)


In [16]:
df_filter = df_filer.set_index("name")

In [17]:
indices = pd.Series(df_filter.index)

In [18]:
indices.head()

0                      Maiyas
1                   Tastebuds
2    Mojo Pizza - 2X Toppings
3             Nandhana Palace
4                   Hot Brixx
Name: name, dtype: object

In [19]:
# Creating Tf-Idf matrix
from sklearn.feature_extraction.text import TfidfVectorizer

TFIDF = TfidfVectorizer(ngram_range=(1, 2), stop_words='english')
cuisine_matrix = TFIDF.fit_transform(df_filter["cuisines"])

In [20]:
cost_matrix = pd.concat([df_filter["cost"],df_filter["rate"]],axis=1).values

In [21]:
print("Cuisine ",cuisine_matrix.shape)
print("Cost and Rate ",cost_matrix.shape)

Cuisine  (4784, 924)
Cost and Rate  (4784, 2)


In [22]:
from scipy.sparse import hstack

matrix = hstack((cuisine_matrix,cost_matrix))

In [23]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_similarities = cosine_similarity(matrix)

In [24]:
def recommend(name, n, cosine_similarities=cosine_similarities):
    restaurant = []
    
    # Index of the Restaurant
    idx = indices[indices == name].index[0]
    
    # Restaurants with similar score
    score = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
    
    # Top n Indexes
    top = list(score.iloc[0:n].index)
    
    # Covert Top to df
    top = df_filter.iloc[top]
    
    return top

In [25]:
df_filter.loc[indices[0]]

online_order                                                 True
book_table                                                  False
rate                                                          3.9
votes                                                         291
location                                             malleshwaram
rest_type                                     quick bites, bakery
cuisines        south indian, north indian, chinese, bakery, d...
cost                                                        300.0
Name: Maiyas, dtype: object

In [26]:
recommend(indices[0],20)

Unnamed: 0_level_0,online_order,book_table,rate,votes,location,rest_type,cuisines,cost
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Maiyas,True,False,3.9,291,malleshwaram,"quick bites, bakery","south indian, north indian, chinese, bakery, d...",300.0
Bhavani Restaurant,False,False,3.9,100,basavanagudi,quick bites,"south indian, north indian, chinese",300.0
Atithi Aramane Veg,True,False,3.9,49,bellandur,quick bites,"south indian, north indian, chinese",300.0
Upahara Mandira,True,False,3.9,24,hbr layout,quick bites,"south indian, north indian, chinese",300.0
Matru Sagar,True,False,4.0,332,kalyan nagar,quick bites,"south indian, north indian, chinese",300.0
Adithya,True,False,4.0,334,jp nagar,quick bites,"south indian, north indian, chinese",300.0
Adithya,True,False,4.0,332,jp nagar,quick bites,"south indian, north indian, chinese",300.0
Matru Sagar,True,False,4.1,327,kalyan nagar,quick bites,"south indian, north indian, chinese",300.0
Sri Ayodhya Veg,False,False,3.6,24,vasanth nagar,quick bites,"south indian, north indian, chinese",300.0
New Sagar Fast Food,False,False,3.6,21,rajajinagar,quick bites,"south indian, north indian, chinese",300.0
