In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack

In [2]:
data = pd.read_csv('HyderabadResturants.csv')

In [3]:
data.head()

Unnamed: 0,links,names,ratings,cuisine,price for one
0,https://www.zomato.com/hyderabad/sahara-bakers...,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100
1,https://www.zomato.com/hyderabad/kfc-abids/order,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100
2,https://www.zomato.com/hyderabad/subbaiah-gari...,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100
3,https://www.zomato.com/hyderabad/paradise-biry...,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100
4,https://www.zomato.com/hyderabad/pista-house-b...,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100


In [4]:
data.isna().sum()

links            0
names            0
ratings          0
cuisine          0
price for one    0
dtype: int64

In [5]:
data.describe()

Unnamed: 0,price for one
count,657.0
mean,169.406393
std,97.178712
min,50.0
25%,100.0
50%,150.0
75%,250.0
max,400.0


In [6]:
data['names'].value_counts()

Kwality Wall’s Frozen Dessert and Ice Cream Shop    3
Sri Balaji Mithai Bhandar                           3
LunchBox - Meals and Thalis                         3
Mahalaxmi Tiffin Centre                             2
Cakes King                                          2
                                                   ..
Ghrelin Cafe & Patisserie                           1
The Exotic Shawarma                                 1
Dessi Cuppa                                         1
K&C Bakers & Confectioners                          1
Tasim                                               1
Name: names, Length: 636, dtype: int64

In [7]:
data.shape

(657, 5)

##### Data Pre-processing

In [8]:
text_features = data['names']+ ' ' + data['cuisine']
print(text_features)

0      Sahara Bakers Chinese, Bakery, Sichuan, Pizza,...
1      KFC Burger, Fast Food, Biryani, Desserts, Beve...
2       Subbaiah Gari Hotel South Indian, Andhra, Mithai
3      Paradise Biryani Biryani, Kebab, Desserts, Bev...
4      Pista House Bakery Fast Food, Sandwich, Pizza,...
                             ...                        
652                            Dr Cakes Bakery, Desserts
653                              Shahi Naan North Indian
654       Combosthalam By Phulkaas North Indian, Chinese
655                    Pachadis By Phulkaas South Indian
656                                 Tasim Chinese, Momos
Length: 657, dtype: object


In [9]:
vectorizer = TfidfVectorizer()
text_vectors = vectorizer.fit_transform(text_features)
print(text_vectors)

  (0, 138)	0.3222813595158181
  (0, 599)	0.29221400612293974
  (0, 714)	0.34346796429976495
  (0, 78)	0.2719169001111391
  (0, 169)	0.23082766469944987
  (0, 77)	0.4508953967591267
  (0, 670)	0.6018709058085221
  (1, 98)	0.24188495931289938
  (1, 233)	0.23790207415492323
  (1, 111)	0.33236717614859995
  (1, 286)	0.20668238880334316
  (1, 275)	0.2503294237636857
  (1, 417)	0.7210819896062932
  (1, 138)	0.3861148324166041
  (2, 499)	0.28127869513133374
  (2, 48)	0.35207200094230157
  (2, 375)	0.1571182527682464
  (2, 735)	0.2016450188394592
  (2, 353)	0.3220085965933011
  (2, 309)	0.5603084893886573
  (2, 754)	0.5603084893886573
  (3, 414)	0.44279072544624787
  (3, 580)	0.587608225131558
  (3, 98)	0.21949879320923998
  (3, 233)	0.2158845193488473
  :	:
  (651, 752)	0.30590154697778493
  (651, 499)	0.3358784924895292
  (651, 286)	0.1917747314638084
  (652, 241)	0.7668365227556168
  (652, 146)	0.4774191989992748
  (652, 233)	0.25299758131653943
  (652, 78)	0.3464460703240058
  (653, 526)	0

In [10]:
numerical_features = data['price for one'].values.reshape(-1,1)
scaler = StandardScaler()
numerical_features_scaled = scaler.fit_transform(numerical_features)
print(numerical_features_scaled)

[[-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.71475813]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [ 2.37468983]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-1.22966612]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985013]
 [-0.19985

##### Combining textual and numerical features

In [11]:
feature_matrix = hstack((text_vectors,numerical_features_scaled))
print(feature_matrix)

  (0, 138)	0.3222813595158181
  (0, 599)	0.29221400612293974
  (0, 714)	0.34346796429976495
  (0, 78)	0.2719169001111391
  (0, 169)	0.23082766469944987
  (0, 77)	0.4508953967591267
  (0, 670)	0.6018709058085221
  (1, 98)	0.24188495931289938
  (1, 233)	0.23790207415492323
  (1, 111)	0.33236717614859995
  (1, 286)	0.20668238880334316
  (1, 275)	0.2503294237636857
  (1, 417)	0.7210819896062932
  (1, 138)	0.3861148324166041
  (2, 499)	0.28127869513133374
  (2, 48)	0.35207200094230157
  (2, 375)	0.1571182527682464
  (2, 735)	0.2016450188394592
  (2, 353)	0.3220085965933011
  (2, 309)	0.5603084893886573
  (2, 754)	0.5603084893886573
  (3, 414)	0.44279072544624787
  (3, 580)	0.587608225131558
  (3, 98)	0.21949879320923998
  (3, 233)	0.2158845193488473
  :	:
  (632, 877)	1.8597818353629387
  (633, 877)	0.829965850673979
  (634, 877)	0.829965850673979
  (635, 877)	0.829965850673979
  (636, 877)	0.829965850673979
  (637, 877)	0.829965850673979
  (638, 877)	0.829965850673979
  (639, 877)	0.829965

##### Finding Similarity Score using cosine_similarity

In [12]:
similarity = cosine_similarity(feature_matrix) 
print(similarity)

[[ 1.          0.42049477  0.33813371 ... -0.49069955 -0.51215073
  -0.4854255 ]
 [ 0.42049477  1.          0.33813371 ... -0.51215073 -0.51215073
  -0.51215073]
 [ 0.33813371  0.33813371  1.         ... -0.50147482 -0.48302646
  -0.51215073]
 ...
 [-0.49069955 -0.51215073 -0.50147482 ...  1.          0.89008041
   0.79198033]
 [-0.51215073 -0.51215073 -0.48302646 ...  0.89008041  1.
   0.77572383]
 [-0.4854255  -0.51215073 -0.51215073 ...  0.79198033  0.77572383
   1.        ]]


In [13]:
data.head()

Unnamed: 0,links,names,ratings,cuisine,price for one
0,https://www.zomato.com/hyderabad/sahara-bakers...,Sahara Bakers,3.7,"Chinese, Bakery, Sichuan, Pizza, Burger",100
1,https://www.zomato.com/hyderabad/kfc-abids/order,KFC,3.9,"Burger, Fast Food, Biryani, Desserts, Beverages",100
2,https://www.zomato.com/hyderabad/subbaiah-gari...,Subbaiah Gari Hotel,4.1,"South Indian, Andhra, Mithai",100
3,https://www.zomato.com/hyderabad/paradise-biry...,Paradise Biryani,3.9,"Biryani, Kebab, Desserts, Beverages",100
4,https://www.zomato.com/hyderabad/pista-house-b...,Pista House Bakery,4.3,"Fast Food, Sandwich, Pizza, Burger, Wraps, Rol...",100


##### Building Recommendation System

In [20]:
# Prompt user for restaurant name and price range
restaurant_name = input('Enter the name of a restaurant: ').lower()  # Convert input to lowercase
min_price = float(input("Enter the minimum price you'd like to pay: "))
max_price = float(input("Enter the maximum price you'd like to pay: "))

# Check if a similar restaurant name exists in the DataFrame (case-insensitive)
similar_restaurant_names = data['names'].str.lower()  # Convert restaurant names in DataFrame to lowercase
if restaurant_name not in similar_restaurant_names.values:
    print("Sorry, the restaurant you entered is not found in the dataset.")
else:
    # Find the index of the input restaurant name in the DataFrame
    restaurant_index = similar_restaurant_names[similar_restaurant_names == restaurant_name].index[0]

    # Calculate the similarity scores between the input restaurant and all other restaurants
    similarity_scores = similarity[restaurant_index]

    # Sort the similarity scores and get the indices of the most similar restaurants
    most_similar_indices = np.argsort(similarity_scores)[::-1]

    # Filter the most similar restaurants based on the price range
    recommended_restaurants = []
    for index in most_similar_indices:
        restaurant_price = data.loc[index, 'price for one']
        if min_price <= restaurant_price <= max_price:
            recommended_restaurants.append(data.loc[index, 'names'])
            if len(recommended_restaurants) == 5:  # Stop after 5 recommendations
                break

    # Print the recommended restaurants
    if recommended_restaurants:
        print("Recommended restaurants based on your preferences:")
        for i, restaurant in enumerate(recommended_restaurants, start=1):
            print(f"{i}. {restaurant}, Price: {min_price}")
    else:
        print("No restaurants found matching your preferences.")


Enter the name of a restaurant: Paradise Biryani
Enter the minimum price you'd like to pay: 100
Enter the maximum price you'd like to pay: 100
Recommended restaurants based on your preferences:
1. Paradise Biryani, Price: 100.0
2. Paradise Bakery - By Paradise Biryani, Price: 100.0
3. Makhni Brothers, Price: 100.0
4. Hotel Parwaz, Price: 100.0
5. Royal Biryani & Kababs, Price: 100.0
