# Zomato hotel's recomondation using KNN model

In [89]:
#Importing the required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import spatial
import operator
%matplotlib inline

In [90]:
#Reading the data from the file
data=pd.read_csv(r'C:\Users\pc\DataScience\KNN\Zomato\Zomato.csv')
data=data[0:1000]

## DATA PREPROCESSING

In [91]:
data=data.drop_duplicates()

In [92]:
#Dropping unnecessary columns
data=data.drop(columns={'url','address','reviews_list','menu_item','dish_liked','phone','listed_in(type)','listed_in(city)'})

In [93]:
#Renaming the columns
data=data.rename(columns={'name':'Name','online_order':'Online','book_table':'BookTable','rate':'Rate','votes':'Votes','location':'Location','rest_type':'RestType','cuisines':'Cuisines','approx_cost(for two people)':'Cost'})

In [94]:
data['index']=np.array(range(0,len(data)))
data.set_index('index')
data.loc[data.Name=='Jalsa']

Unnamed: 0,Name,Online,BookTable,Rate,Votes,Location,RestType,Cuisines,Cost,index
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,0
456,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,456
559,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,559


In [95]:
#Handling the null values in Cuisines and Type
data.Cuisines=data.Cuisines.replace(np.nan,'UnknownCuisine')
data.RestType=data.RestType.replace(np.nan,'UnknownRestType')

## FEATURE ENGINEERING

In [68]:
#Check values of Location attribute
locList=list(data.Location.unique())

In [69]:
#Handling the Rest Type attribute
Type=list(data.RestType)
uniqueType=[]
for ele in Type:
    subList=ele.split(',')
    for eleSub in subList:
        eleSub=eleSub.strip()
        if eleSub not in uniqueType:
            uniqueType.append(eleSub)

In [70]:
#Handling the Rest Type attribute
Cuisines=list(data.Cuisines)
uniqueCuisines=[]
for ele in Cuisines:
    subList=ele.split(',')
    for eleSub in subList:
        eleSub=eleSub.strip()
        if eleSub not in uniqueCuisines:
            uniqueCuisines.append(eleSub)

In [71]:
len(uniqueType)

20

In [72]:
#Creating the final dataset
df=pd.DataFrame()
df=data

In [73]:
#Dummy encoding the columns
df=df.drop(columns={'Location','RestType','Cuisines'})
for types in uniqueType:
    df[types]=np.nan
for cuisines in uniqueCuisines:
    df[cuisines]=np.nan

In [74]:
dfLoc=pd.get_dummies(data['Location'])

In [75]:
df=df.fillna(0)

In [76]:
#Handling the RestType Attributes
for index in data.index:
    ele=data.loc[index]['RestType']
    subList=ele.split(',')
    for eleSub in subList:
        eleSub=eleSub.strip()
        df[eleSub][index]=1
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [77]:
#Handling the RestType Attributes
for index in data.index:
    ele=data.loc[index]['Cuisines']
    subList=ele.split(',')
    for eleSub in subList:
        eleSub=eleSub.strip()
        df[eleSub][index]=1
        

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [78]:
df.BookTable=df.BookTable.eq('Yes').mul(1)
df.Online=df.Online.eq('Yes').mul(1)

In [79]:
df.Rate=df.Rate.replace('NEW','0/5')
df.Rate=df.Rate.replace(0,'0/5')


In [80]:
for index in df.index:
    df['Rate'][index]=float(df['Rate'][index].split("/")[0])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [81]:
df.Cost=df.Cost.apply(lambda x: x.replace(',',''))

In [82]:
df.Cost=df.Cost.astype('int')

In [83]:
#Final featured and preprocessed dataset
df=pd.concat([df,dfLoc],axis=1)

In [84]:
df.Rate=df.Rate.astype('float')

In [85]:
# Function to calculate distances between movies
def ComputeDistance(a, b):
    OnlineA= a[1]
    OnlineB = b[1]
    BookTableA = a[2]
    BookTableB = b[2]
    RateA = a[3]
    RateB= b[3]
    VotesA= a[4]
    VotesB = b[4]
    CostA=a[5]
    CostB=b[5]
    RestTypeA=a[6:26]
    RestTypeB=b[6:26]
    CuisineA=a[27:85]
    CuisineB=b[27:85]
    LocationA=a[86:]
    LocationB=b[86:]
    Online=abs(OnlineA-OnlineB)
    BookTable=abs(BookTableA-BookTableB)
    Rate=abs(RateA-RateB)
    Votes=abs(VotesA-VotesB)
    Cost=abs(CostA-CostB)
    RestType=spatial.distance.cosine(RestTypeA,RestTypeB)
    Cuisine=spatial.distance.cosine(CuisineA,CuisineB)
    Location=spatial.distance.cosine(LocationA,LocationB)
    return Online + BookTable + Rate + Votes + Cost + RestType + Cuisine + Location

In [86]:
b=(list(df.loc[1]))
a=['Jalsa', 1, 1, 4.1, 775, 800, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print(a[86:])
type(a)
print(ComputeDistance(a, b))

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


ValueError: operands could not be broadcast together with shapes (14,) (15,) 

In [None]:

def getNeighbors(a, K):
    distances = []
    for index in df.index:
        dist = ComputeDistance(list(a), list(df.loc[index]))
        distances.append((df['Name'][index], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(K):
        neighbors.append(distances[x][0])
    return neighbors


In [None]:
K = 10
neighbors = getNeighbors(a, K)
print("10 Neighbors:")
for neighbor in neighbors:
    print(data.Name[neighbor])