In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# progress bar
from tqdm.notebook import tqdm

# dimensionality reduction (for visualization)+
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# clustering
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist


from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('C:\\Users\computer\HyderabadResturants.csv')

In [3]:
data

Unnamed: 0,links,names,ratings,cuisine,price for one
0,https://www.zomato.com/hyderabad/sahara-bakers...,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100
1,https://www.zomato.com/hyderabad/kfc-abids/order,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100
2,https://www.zomato.com/hyderabad/subbaiah-gari...,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100
3,https://www.zomato.com/hyderabad/paradise-biry...,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100
4,https://www.zomato.com/hyderabad/pista-house-b...,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100
...,...,...,...,...,...
652,https://www.zomato.com/hyderabad/dr-cakes-banj...,Dr Cakes,3.2,"Bakery, Desserts",350
653,https://www.zomato.com/hyderabad/shahi-naan-am...,Shahi Naan,-,North Indian,350
654,https://www.zomato.com/hyderabad/combosthalam-...,Combosthalam By Phulkaas,3.8,"North Indian, Chinese",350
655,https://www.zomato.com/hyderabad/pachadis-by-p...,Pachadis By Phulkaas,-,South Indian,350


In [4]:
data.drop('links', axis=1, inplace = True)
data.head()

Unnamed: 0,names,ratings,cuisine,price for one
0,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100
1,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100
2,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100
3,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100
4,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 657 entries, 0 to 656
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   names          657 non-null    object
 1   ratings        657 non-null    object
 2   cuisine        657 non-null    object
 3   price for one  657 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 20.7+ KB


In [6]:
data.shape

(657, 4)

In [7]:
data[["cuisine1", "cuisine2",'cuisine3','cuisine4' ,'cuisine5','cuisine6','cuisine7','cuisine8']] = (  # Create two new features
    data["cuisine"]           # from the cuisine feature
    .str                         # through the string accessor
    .split(",", expand=True)     # by splitting on ","
                                 # and expanding the result into separate columns
)
data.rename(columns ={'price for one': 'price'},inplace=True)
data.head()

Unnamed: 0,names,ratings,cuisine,price,cuisine1,cuisine2,cuisine3,cuisine4,cuisine5,cuisine6,cuisine7,cuisine8
0,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100,Chinese,Bakery,Sichuan,Pizza,Burger,,,
1,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100,Burger,Fast Food,Biryani,Desserts,Beverages,,,
2,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100,South Indian,Andhra,Mithai,,,,,
3,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100,Biryani,Kebab,Desserts,Beverages,,,,
4,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100,Fast Food,Sandwich,Pizza,Burger,Wraps,Rolls,Salad,Desserts


In [8]:
features = ["cuisine1", "cuisine2",'cuisine3','cuisine4' ,'cuisine5','cuisine6','cuisine7','cuisine8']
data["temp"] = data[features].isnull().sum(axis=1)
data["no_Of_cusines"] = 8- data["temp"] 
# data.drop(['temp','cuisine'],axis=1,inplace=True)
data.head()

Unnamed: 0,names,ratings,cuisine,price,cuisine1,cuisine2,cuisine3,cuisine4,cuisine5,cuisine6,cuisine7,cuisine8,temp,no_Of_cusines
0,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100,Chinese,Bakery,Sichuan,Pizza,Burger,,,,3,5
1,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100,Burger,Fast Food,Biryani,Desserts,Beverages,,,,3,5
2,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100,South Indian,Andhra,Mithai,,,,,,5,3
3,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100,Biryani,Kebab,Desserts,Beverages,,,,,4,4
4,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100,Fast Food,Sandwich,Pizza,Burger,Wraps,Rolls,Salad,Desserts,0,8


In [9]:
# from sklearn.cluster import KMeans
# from sklearn.preprocessing import StandardScaler
# from sklearn.pipeline import Pipeline

# cluster_pipeline = Pipeline([('scaler', StandardScaler()), ('kmeans', KMeans(n_clusters=10))])
# X = genre_data.select_dtypes(np.number)
# cluster_pipeline.fit(X)
# genre_data['cluster'] = cluster_pipeline.predict(X)

In [11]:
feature = data["cuisine"].tolist()
tfidf = text.TfidfVectorizer(input=feature, stop_words="english")
tfidf_matrix = tfidf.fit_transform(feature)
similarity = cosine_similarity(tfidf_matrix)

In [12]:
indices = pd.Series(data.index, index=data['names']).drop_duplicates()

In [13]:
def restaurant_recommendation(name, similarity = similarity):
    index = indices[name]
    similarity_scores = list(enumerate(similarity[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[0:10]
    restaurantindices = [i[0] for i in similarity_scores]
    return data['names'].iloc[restaurantindices]

In [14]:
print(restaurant_recommendation("Paradise Biryani"))

3                        Paradise Biryani
621             Potful - Claypot Biryanis
37                        Behrouz Biryani
303    Aarth - Authentic Claypot Biryanis
245                       Makhni Brothers
118                          Hotel Sohail
500                      Heavenly Biryani
43       Capital Multi Cuisine Restaurant
646                   Bhatthi By Phulkaas
122                     Redaan Restaurant
Name: names, dtype: object
