In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
reviews=pd.read_csv("reviews.csv")
data=reviews.copy()
reviews.head()
reviews.tail()

Unnamed: 0,Index,Name,Overall_Rating,Cuisine,Rate for two,City,Review
827,827,Humming bird cafe,3.9,"Cafe, Pizza, Continental, Italian, Bakery, Bev...",1100,hyderabad,undercooked food
828,828,Roast 26,3.9,"Cafe, Pizza, Continental, Italian, Bakery, Bev...",1400,hyderabad,"Yup, that's worth INR 410. Unbelievable less q..."
829,829,Olive Bistro,4.2,"Cafe, Shake, Beverages, Waffle, Bakery, Desser...",1400,hyderabad,Not even lit hot
830,830,Hoppery,4.1,"Cafe, Coffee, Shake, Beverages, Sandwich, Past...",1200,hyderabad,"Cappuccino tasted really bad, had a burnt tast..."
831,831,Elite bistro,4.2,"Cafe, Coffee, Shake, Beverages, Sandwich, Past...",1400,hyderabad,"For a coffee place, the coffee definitely fall..."


In [4]:
#data cleansing
reviews.duplicated().sum()
reviews.drop_duplicates(inplace=True)

reviews.isnull().sum()
reviews.dropna(how='any', inplace=True)
reviews = reviews.rename(columns={'Rate for two':'cost','Overall_Rating':'rating'})
reviews['City'].describe

<bound method NDFrame.describe of 0      ahmedabad
1      ahmedabad
2      ahmedabad
3      ahmedabad
4      ahmedabad
         ...    
827    hyderabad
828    hyderabad
829    hyderabad
830    hyderabad
831    hyderabad
Name: City, Length: 832, dtype: object>

In [5]:
reviews['cost'] = pd.to_numeric(reviews['cost'], errors='coerce')
default_value = 0  # You can choose a different default value
reviews['cost'].fillna(default_value, inplace=True)
reviews['cost'] = reviews['cost'].astype(str) #Changing the cost to string
reviews['cost'] = reviews['cost'].apply(lambda x: x.replace(',','')) #Using lambda function to replace ',' from cost
reviews['cost']=reviews['cost'].astype(float)
reviews['rating'] = pd.to_numeric(reviews['rating'], errors='coerce')

# Drop rows with missing values (NaN)
reviews = reviews.dropna()

In [6]:
scaler = MinMaxScaler()
reviews[['rating', 'cost']] = scaler.fit_transform(reviews[['rating', 'cost']])


In [7]:
feature_columns = ['rating', 'cost']
vectors = reviews[feature_columns].values

In [8]:
cosine_sim_mat=cosine_similarity(vectors,vectors)

In [9]:
target_cafe_index = 0
similar_cafes_indices = sorted(range(len(cosine_sim_mat[target_cafe_index])), key=lambda k: cosine_sim_mat[target_cafe_index][k], reverse=True)[1:]
recommended_cafes = reviews.iloc[similar_cafes_indices]

In [10]:
print(recommended_cafes[['Name', 'rating', 'cost']])

                  Name    rating      cost
1         Oliver Brown  0.583333  0.357143
41   Indie Brewed Cafe  0.583333  0.357143
42   Indie Brewed Cafe  0.583333  0.357143
43   Indie Brewed Cafe  0.583333  0.357143
63         Cafe Selfie  0.583333  0.357143
..                 ...       ...       ...
522         Cafe Diona  0.041667  0.678571
397        Narcos Cafe  0.000000  0.357143
398        Narcos Cafe  0.000000  0.357143
399        Narcos Cafe  0.000000  0.357143
400        Narcos Cafe  0.000000  0.357143

[714 rows x 3 columns]


In [11]:
recommended_cafes

Unnamed: 0,Index,Name,rating,Cuisine,cost,City,Review
1,1,Oliver Brown,0.583333,"Cafe, Coffee, Shake, Juices, Beverages, Waffle...",0.357143,ahmedabad,I recently visited Oliver Brown on a weekend f...
41,41,Indie Brewed Cafe,0.583333,"Cafe, Coffee, Shake, Beverages, Fast Food",0.357143,ahmedabad,Very bad iced AmericanoDon’t order from here
42,42,Indie Brewed Cafe,0.583333,"Cafe, Coffee, Shake, Beverages, Fast Food",0.357143,ahmedabad,Ordered from here thrice. I love what they off...
43,43,Indie Brewed Cafe,0.583333,"Cafe, Coffee, Shake, Beverages, Fast Food",0.357143,ahmedabad,There is no ice in peach iced tea in fact it i...
63,63,Cafe Selfie,0.583333,"Cafe, Beverages, Shake, Fast Food",0.357143,ahmedabad,vary small
...,...,...,...,...,...,...,...
522,522,Cafe Diona,0.041667,"Cafe, North Indian, Chinese, Continental, Fast...",0.678571,jaipur,First they charge you so much for a couple ent...
397,397,Narcos Cafe,0.000000,"Cafe, Fast Food, Coffee, Beverages",0.357143,bangalore,"If you want to waste time and money, go here! ..."
398,398,Narcos Cafe,0.000000,"Cafe, Fast Food, Coffee, Beverages",0.357143,bangalore,"This review is for the one at ITC factory, my ..."
399,399,Narcos Cafe,0.000000,"Cafe, Fast Food, Coffee, Beverages",0.357143,bangalore,They claim to be open for breakfast at 9am. We...


In [12]:
new_dataset=recommended_cafes[['Name','cost','rating']].copy()
new_dataset.duplicated().sum()
new_dataset

Unnamed: 0,Name,cost,rating
1,Oliver Brown,0.357143,0.583333
41,Indie Brewed Cafe,0.357143,0.583333
42,Indie Brewed Cafe,0.357143,0.583333
43,Indie Brewed Cafe,0.357143,0.583333
63,Cafe Selfie,0.357143,0.583333
...,...,...,...
522,Cafe Diona,0.678571,0.041667
397,Narcos Cafe,0.357143,0.000000
398,Narcos Cafe,0.357143,0.000000
399,Narcos Cafe,0.357143,0.000000


In [13]:
new_dataset.drop_duplicates(inplace=True)

In [14]:
new_dataset

Unnamed: 0,Name,cost,rating
1,Oliver Brown,0.357143,0.583333
41,Indie Brewed Cafe,0.357143,0.583333
63,Cafe Selfie,0.357143,0.583333
68,Busy Beans,0.357143,0.583333
149,Hustle Snooker Cafe Bistro,0.357143,0.583333
...,...,...,...
134,Back To Basics,0.500000,0.125000
215,Cafe Cremyday,0.357143,0.083333
394,Cafe Centralis,0.678571,0.125000
521,Cafe Diona,0.678571,0.041667


In [15]:
merged_df = pd.merge(new_dataset, data, left_index=True, right_index=True)
merged_df.drop(columns=['Name_y'], inplace=True)

In [16]:
merged_df.rename(columns={'Name_x':'Name'},inplace=True)
merged_df

Unnamed: 0,Name,cost,rating,Index,Overall_Rating,Cuisine,Rate for two,City,Review
1,Oliver Brown,0.357143,0.583333,1,3.9,"Cafe, Coffee, Shake, Juices, Beverages, Waffle...",500,ahmedabad,I recently visited Oliver Brown on a weekend f...
41,Indie Brewed Cafe,0.357143,0.583333,41,3.9,"Cafe, Coffee, Shake, Beverages, Fast Food",500,ahmedabad,Very bad iced AmericanoDon’t order from here
63,Cafe Selfie,0.357143,0.583333,63,3.9,"Cafe, Beverages, Shake, Fast Food",500,ahmedabad,vary small
68,Busy Beans,0.357143,0.583333,68,3.9,"Cafe, Beverages, Fast Food",500,ahmedabad,Pure coffee beans test and delicious...Good q...
149,Hustle Snooker Cafe Bistro,0.357143,0.583333,149,3.9,"Cafe, Coffee, Sandwich, Pizza, Burger, Fast Fo...",500,pune,the pizza was cold and not in good shape and w...
...,...,...,...,...,...,...,...,...,...
134,Back To Basics,0.500000,0.125000,134,2.8,"Cafe, Coffee, Beverages, Sandwich, Fast Food, ...",700,pune,food food food at it's best! ambience at it's ...
215,Cafe Cremyday,0.357143,0.083333,215,2.7,"Cafe, Fast Food, Desserts, Beverages, Street F...",500,pune,My friend visited this outlet yesterday to eat...
394,Cafe Centralis,0.678571,0.125000,394,2.8,"Cafe, Chinese, Italian, Sichuan",950,bangalore,One of the best place to visit they serve one ...
521,Cafe Diona,0.678571,0.041667,521,2.6,"Cafe, North Indian, Chinese, Continental, Fast...",950,jaipur,The staff is very rude. They do not have hotel...


In [17]:
final_data=merged_df[['Name','Overall_Rating','Cuisine','Rate for two','City']].copy()

In [18]:
final_data.duplicated().sum()
final_data.isnull().sum()

Name              0
Overall_Rating    0
Cuisine           0
Rate for two      0
City              0
dtype: int64

In [19]:
final_data.drop_duplicates(inplace=True)
final_data

Unnamed: 0,Name,Overall_Rating,Cuisine,Rate for two,City
1,Oliver Brown,3.9,"Cafe, Coffee, Shake, Juices, Beverages, Waffle...",500,ahmedabad
41,Indie Brewed Cafe,3.9,"Cafe, Coffee, Shake, Beverages, Fast Food",500,ahmedabad
63,Cafe Selfie,3.9,"Cafe, Beverages, Shake, Fast Food",500,ahmedabad
68,Busy Beans,3.9,"Cafe, Beverages, Fast Food",500,ahmedabad
149,Hustle Snooker Cafe Bistro,3.9,"Cafe, Coffee, Sandwich, Pizza, Burger, Fast Fo...",500,pune
...,...,...,...,...,...
134,Back To Basics,2.8,"Cafe, Coffee, Beverages, Sandwich, Fast Food, ...",700,pune
215,Cafe Cremyday,2.7,"Cafe, Fast Food, Desserts, Beverages, Street F...",500,pune
394,Cafe Centralis,2.8,"Cafe, Chinese, Italian, Sichuan",950,bangalore
521,Cafe Diona,2.6,"Cafe, North Indian, Chinese, Continental, Fast...",950,jaipur


In [20]:
def search_city(city_name):
    cafes_in_city = final_data[final_data['City'].str.lower() == city_name.lower()]
    return cafes_in_city

In [21]:
cafes_in_city=search_city('jaipur')
cafes_in_city


Unnamed: 0,Name,Overall_Rating,Cuisine,Rate for two,City
482,Jashn,3.9,"Cafe, Coffee, Fast Food, Street Food, North In...",500,jaipur
546,Malgudi Cafe,4.0,"Cafe, Italian, Chinese, Fast Food, Beverages, ...",500,jaipur
530,Viona Baristro,3.6,"Cafe, Street Food, Chinese, Pizza, Fast Food, ...",350,jaipur
444,Terra Gourmet,4.2,"Cafe, Coffee, Continental, Fast Food, Pasta, L...",700,jaipur
447,Roastery Coffee House,4.5,"Cafe, Coffee, Pizza, Burger, Pasta, Italian, C...",850,jaipur
457,Brown Sugar,4.2,"Cafe, Coffee, Bakery, Desserts, Pizza, Contine...",500,jaipur
442,The Waffle Co.,3.9,"Waffle, Desserts, Pancake, Coffee, Cafe, Shake...",600,jaipur
557,Lloyd's House,3.9,"Cafe, Bakery, Beverages",400,jaipur
435,Barista Coffee,4.1,"Cafe, Coffee, Desserts",700,jaipur
455,Bombaye House,4.1,"Cafe, Chinese, Continental, South Indian, Stre...",700,jaipur


In [22]:
final_data['Rate for two']=final_data['Rate for two'].astype(str)
def cost_cafes(min_price,max_price):
    final_data['Rate for two'] = final_data['Rate for two'].str.replace(',', '').astype(float)
    cafes_in_price_range = final_data[
        (final_data['Rate for two'] >= min_price) & 
        (final_data['Rate for two'] <= max_price)
    ]
    return cafes_in_price_range

In [23]:
cafes_in_price_range=cost_cafes(500,1200)


In [24]:
cafes_in_price_range

Unnamed: 0,Name,Overall_Rating,Cuisine,Rate for two,City
1,Oliver Brown,3.9,"Cafe, Coffee, Shake, Juices, Beverages, Waffle...",500.0,ahmedabad
41,Indie Brewed Cafe,3.9,"Cafe, Coffee, Shake, Beverages, Fast Food",500.0,ahmedabad
63,Cafe Selfie,3.9,"Cafe, Beverages, Shake, Fast Food",500.0,ahmedabad
68,Busy Beans,3.9,"Cafe, Beverages, Fast Food",500.0,ahmedabad
149,Hustle Snooker Cafe Bistro,3.9,"Cafe, Coffee, Sandwich, Pizza, Burger, Fast Fo...",500.0,pune
...,...,...,...,...,...
134,Back To Basics,2.8,"Cafe, Coffee, Beverages, Sandwich, Fast Food, ...",700.0,pune
215,Cafe Cremyday,2.7,"Cafe, Fast Food, Desserts, Beverages, Street F...",500.0,pune
394,Cafe Centralis,2.8,"Cafe, Chinese, Italian, Sichuan",950.0,bangalore
521,Cafe Diona,2.6,"Cafe, North Indian, Chinese, Continental, Fast...",950.0,jaipur


In [25]:
common_data = cafes_in_city[cafes_in_city.index.isin(cafes_in_price_range.index)]

In [26]:
common_data

Unnamed: 0,Name,Overall_Rating,Cuisine,Rate for two,City
482,Jashn,3.9,"Cafe, Coffee, Fast Food, Street Food, North In...",500,jaipur
546,Malgudi Cafe,4.0,"Cafe, Italian, Chinese, Fast Food, Beverages, ...",500,jaipur
444,Terra Gourmet,4.2,"Cafe, Coffee, Continental, Fast Food, Pasta, L...",700,jaipur
447,Roastery Coffee House,4.5,"Cafe, Coffee, Pizza, Burger, Pasta, Italian, C...",850,jaipur
457,Brown Sugar,4.2,"Cafe, Coffee, Bakery, Desserts, Pizza, Contine...",500,jaipur
442,The Waffle Co.,3.9,"Waffle, Desserts, Pancake, Coffee, Cafe, Shake...",600,jaipur
435,Barista Coffee,4.1,"Cafe, Coffee, Desserts",700,jaipur
455,Bombaye House,4.1,"Cafe, Chinese, Continental, South Indian, Stre...",700,jaipur
488,Starstud Cafe,4.0,"Cafe, Fast Food, Burger, Sandwich, Street Food...",700,jaipur
526,Loca By La Multigrain,4.0,"Cafe, Fast Food, Desserts, Beverages",700,jaipur


In [45]:
import pickle

In [46]:
pickle.dump(common_data.to_dict(),open('cafe_dict.pkl','wb'))

In [30]:
cafe_dict

NameError: name 'cafe_dict' is not defined