# Yelp Recommendation System for Housing Project

### Setting up the environment for analysis

In [None]:
#Installing the Necessary packages
!sudo apt install gdal-bin python-gdal python3-gdal 
!sudo apt install python3-rtree 
!pip install git+git://github.com/geopandas/geopandas.git
!pip install descartes 
!pip install folium 
!pip install plotly_express

In [1]:
#Importing the packages
import pandas as pd 
import numpy as np
import json
import folium
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

### Exploratory Analysis

In [2]:
#Reading the Yelp Business DataSet
df = pd.read_csv('yelp_business.csv')
df.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,FYWN1wneV18bWNgQjJ2GNg,"""Dental by Design""",,"""4855 E Warner Rd, Ste B9""",Ahwatukee,AZ,85044,33.33069,-111.978599,4.0,22,1,Dentists;General Dentistry;Health & Medical;Or...
1,He-G7vWjzVUysIKrfNbPUQ,"""Stephen Szabo Salon""",,"""3101 Washington Rd""",McMurray,PA,15317,40.291685,-80.1049,3.0,11,1,Hair Stylists;Hair Salons;Men's Hair Salons;Bl...
2,KQPW8lFf1y5BT2MxiSZ3QA,"""Western Motor Vehicle""",,"""6025 N 27th Ave, Ste 1""",Phoenix,AZ,85017,33.524903,-112.11531,1.5,18,1,Departments of Motor Vehicles;Public Services ...
3,8DShNS-LuFqpEWIp0HxijA,"""Sports Authority""",,"""5000 Arizona Mills Cr, Ste 435""",Tempe,AZ,85282,33.383147,-111.964725,3.0,9,0,Sporting Goods;Shopping
4,PfOCPjBrlQAnz__NXj9h_w,"""Brick House Tavern + Tap""",,"""581 Howe Ave""",Cuyahoga Falls,OH,44221,41.119535,-81.47569,3.5,116,1,American (New);Nightlife;Bars;Sandwiches;Ameri...


In [3]:

df.shape

(174567, 13)

In [4]:
df['Restaurants'] = df['categories'].str.contains('Restaurants')


In [5]:
df_restaurants = df.loc[df.Restaurants == True]
df_restaurants.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,Restaurants
4,PfOCPjBrlQAnz__NXj9h_w,"""Brick House Tavern + Tap""",,"""581 Howe Ave""",Cuyahoga Falls,OH,44221,41.119535,-81.47569,3.5,116,1,American (New);Nightlife;Bars;Sandwiches;Ameri...,True
5,o9eMRCWt5PkpLDE0gOPtcQ,"""Messina""",,"""Richterstr. 11""",Stuttgart,BW,70567,48.7272,9.14795,4.0,5,1,Italian;Restaurants,True
10,XOSRcvtaKc_Q5H1SAzN20A,"""East Coast Coffee""",,"""737 West Pike St""",Houston,PA,15342,40.241548,-80.212815,4.5,3,0,Breakfast & Brunch;Gluten-Free;Coffee & Tea;Fo...,True
14,fNMVV_ZX7CJSDWQGdOM8Nw,"""Showmars Government Center""",Uptown,"""600 E 4th St""",Charlotte,NC,28202,35.221647,-80.839345,3.5,7,1,Restaurants;American (Traditional),True
15,l09JfMeQ6ynYs5MCJtrcmQ,"""Alize Catering""",Yonge and Eglinton,"""2459 Yonge St""",Toronto,ON,M4P 2H6,43.711399,-79.399339,3.0,12,0,Italian;French;Restaurants,True


In [6]:
df_restaurants.shape

(54618, 14)

**Sorting the Restaurants based on review count and Stars**

In [20]:
top_restaurants = df_restaurants.sort_values(by=['review_count', 'stars'], ascending=False)[:1000]
top_restaurants.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,Restaurants
97944,4JNXUYY8wbaaDmk3BPzlWw,"""Mon Ami Gabi""",The Strip,"""3655 Las Vegas Blvd S""",Las Vegas,NV,89109,36.112827,-115.172581,4.0,7361,1,French;Steakhouses;Restaurants;Breakfast & Brunch,True
119907,RESDUcs7fIiihp38-d6_6g,"""Bacchanal Buffet""",The Strip,"""3570 S Las Vegas Blvd""",Las Vegas,NV,89109,36.116113,-115.176222,4.0,7009,1,Sandwiches;Buffets;Restaurants;Breakfast & Bru...,True
69993,K7lWdNUhCbcnEvI0NhGewg,"""Wicked Spoon""",The Strip,"""3708 Las Vegas Blvd S""",Las Vegas,NV,89109,36.109538,-115.17617,3.5,5950,1,Buffets;Breakfast & Brunch;Restaurants,True
81212,cYwJA2A6I12KNkm2rtXd5g,"""Gordon Ramsay BurGR""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.110724,-115.172169,4.0,5447,0,American (Traditional);Burgers;Restaurants,True
139699,DkYS3arLOhA8si5uUEmHOw,"""Earl of Sandwich""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.108228,-115.171869,4.5,4869,1,Caterers;Sandwiches;Restaurants;Food Delivery ...,True


**Fetching the list of restaurants based on geoloacation**

In [21]:
coords = top_restaurants[['longitude','latitude']]
distortions = []
K = range(1,100)
for k in K:
    kmeansModel = KMeans(n_clusters=k)
    kmeansModel = kmeansModel.fit(coords)
    distortions.append(kmeansModel.inertia_)


**Checking the similarity of an object based on silhouette score**

In [22]:
from sklearn.metrics import silhouette_score

sil = []
kmax = 50

# dissimilarity would not be defined for a single cluster, thus, minimum number of clusters should be 2
for k in range(2, kmax+1):
  kmeans = KMeans(n_clusters = k).fit(coords)
  labels = kmeans.labels_
  sil.append(silhouette_score(coords, labels, metric = 'euclidean'))

In [23]:
sil

[0.9280106074771384,
 0.9549353006689872,
 0.9241468512709209,
 0.9410237244104829,
 0.9494674213690556,
 0.9611599180508569,
 0.973228489090511,
 0.9752910524926574,
 0.8103502151194059,
 0.8215885175108103,
 0.57065009753802,
 0.6087281226710416,
 0.6123992507899004,
 0.6369912152290201,
 0.5524801629814051,
 0.5932136113941352,
 0.5753631256553565,
 0.5969900487645492,
 0.5823279457264768,
 0.5873245966433382,
 0.5815486783822276,
 0.5864701717780808,
 0.5671254897200971,
 0.5641533838727175,
 0.5700177979125347,
 0.5777714490318743,
 0.5624071144525077,
 0.568939307843564,
 0.5539308176939606,
 0.5651948551908165,
 0.5734727362463077,
 0.5533930686887076,
 0.5732193425556136,
 0.5101455318217397,
 0.5704017180937863,
 0.5101663214907106,
 0.5345417274136737,
 0.5170955918090501,
 0.5172408979037577,
 0.5008545146031212,
 0.5062447630276455,
 0.5075198332715339,
 0.5213249623561714,
 0.4965582146525742,
 0.5024941766638498,
 0.5189983138070283,
 0.5135652573842,
 0.554427555923691,


**Fitting K-Means model on the data**

In [24]:
kmeans = KMeans(n_clusters=5, init='k-means++')
kmeans.fit(coords)
y = kmeans.labels
print("k = 5", " silhouette_score ", silhouette_score(coords, y, metric='euclidean'))

k = 5  silhouette_score  0.9410237244104829


**Prediction based on location**

In [25]:

top_restaurants['cluster'] = kmeans.predict(top_restaurants[['longitude','latitude']])
top_restaurants.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,Restaurants,cluster
97944,4JNXUYY8wbaaDmk3BPzlWw,"""Mon Ami Gabi""",The Strip,"""3655 Las Vegas Blvd S""",Las Vegas,NV,89109,36.112827,-115.172581,4.0,7361,1,French;Steakhouses;Restaurants;Breakfast & Brunch,True,1
119907,RESDUcs7fIiihp38-d6_6g,"""Bacchanal Buffet""",The Strip,"""3570 S Las Vegas Blvd""",Las Vegas,NV,89109,36.116113,-115.176222,4.0,7009,1,Sandwiches;Buffets;Restaurants;Breakfast & Bru...,True,1
69993,K7lWdNUhCbcnEvI0NhGewg,"""Wicked Spoon""",The Strip,"""3708 Las Vegas Blvd S""",Las Vegas,NV,89109,36.109538,-115.17617,3.5,5950,1,Buffets;Breakfast & Brunch;Restaurants,True,1
81212,cYwJA2A6I12KNkm2rtXd5g,"""Gordon Ramsay BurGR""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.110724,-115.172169,4.0,5447,0,American (Traditional);Burgers;Restaurants,True,1
139699,DkYS3arLOhA8si5uUEmHOw,"""Earl of Sandwich""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.108228,-115.171869,4.5,4869,1,Caterers;Sandwiches;Restaurants;Food Delivery ...,True,1


In [38]:
top_restaurants_order = top_restaurants.sort_values(by=['review_count', 'stars'], ascending=False)
top_restaurants_order.head()

Unnamed: 0,business_id,name,neighborhood,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories,Restaurants,cluster
97944,4JNXUYY8wbaaDmk3BPzlWw,"""Mon Ami Gabi""",The Strip,"""3655 Las Vegas Blvd S""",Las Vegas,NV,89109,36.112827,-115.172581,4.0,7361,1,French;Steakhouses;Restaurants;Breakfast & Brunch,True,1
119907,RESDUcs7fIiihp38-d6_6g,"""Bacchanal Buffet""",The Strip,"""3570 S Las Vegas Blvd""",Las Vegas,NV,89109,36.116113,-115.176222,4.0,7009,1,Sandwiches;Buffets;Restaurants;Breakfast & Bru...,True,1
69993,K7lWdNUhCbcnEvI0NhGewg,"""Wicked Spoon""",The Strip,"""3708 Las Vegas Blvd S""",Las Vegas,NV,89109,36.109538,-115.17617,3.5,5950,1,Buffets;Breakfast & Brunch;Restaurants,True,1
81212,cYwJA2A6I12KNkm2rtXd5g,"""Gordon Ramsay BurGR""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.110724,-115.172169,4.0,5447,0,American (Traditional);Burgers;Restaurants,True,1
139699,DkYS3arLOhA8si5uUEmHOw,"""Earl of Sandwich""",The Strip,"""3667 Las Vegas Blvd S""",Las Vegas,NV,89109,36.108228,-115.171869,4.5,4869,1,Caterers;Sandwiches;Restaurants;Food Delivery ...,True,1


**Function for recommendation based on geo location**

In [39]:
def recommend_restaurants(df, longitude, latitude):
    # Predict the cluster for longitude and latitude provided
    cluster = kmeans.predict(np.array([longitude,latitude]).reshape(1,-1))[0]
    print(cluster)
   
    # Get the best restaurant in this cluster
    return  df[df['cluster']==cluster].iloc[0:30][['name','city','stars', 'latitude','longitude','address','categories']]

**Connecting to Mongo DB**

In [46]:
import pymongo
from pymongo import MongoClient
client = MongoClient("34.83.108.19",27017)
db = client['gradHousing']


**Recommendation for North Carolina**

In [40]:
north_carolina=recommend_restaurants(top_restaurants,35.152725,-80.827953)
north_carolina_list=[]
north_carolina_list=north_carolina.values.tolist()
north_carolina


3


Unnamed: 0,name,city,stars,latitude,longitude,address,categories
144620,"""The Cowfish Sushi Burger Bar""",Charlotte,4.0,35.152725,-80.827953,"""4310 Sharon Rd, Ste X05""",Restaurants;Asian Fusion;Burgers;Sushi Bars
115558,"""Amélie's French Bakery & Café""",Charlotte,4.0,35.241145,-80.812057,"""2424 N Davidson St, Ste 102""",Restaurants;Food;Patisserie/Cake Shop;Coffee &...
73231,"""Midwood Smokehouse""",Charlotte,4.5,35.221004,-80.814875,"""1401 Central Ave""",Smokehouse;Pizza;Food;Restaurants;Barbeque;Ame...
92689,"""Tupelo Honey""",Charlotte,4.0,35.211178,-80.858604,"""1820 South Blvd""",Breakfast & Brunch;Southern;Restaurants;Americ...
43961,"""Cabo Fish Taco""",Charlotte,4.0,35.247251,-80.805827,"""3201 N Davidson St""",Latin American;Mexican;Restaurants;Seafood
13417,"""Soul Gastrolounge""",Charlotte,4.0,35.220214,-80.813368,"""1500-B Central Ave""",Nightlife;Restaurants;Tapas Bars;Lounges;Bars
173858,"""Pinky's Westside Grill""",Charlotte,4.0,35.229054,-80.86737,"""1600 W Morehead St""",American (Traditional);Restaurants;Vegetarian;...
10315,"""Mert's Heart & Soul""",Charlotte,3.5,35.227318,-80.84013,"""214 N College St""",Southern;Soul Food;Restaurants;Breakfast & Brunch
134627,"""Viva Chicken""",Charlotte,4.5,35.213421,-80.826022,"""1617 Elizabeth Ave""",Peruvian;Restaurants
122612,"""Futo Buta""",Charlotte,4.0,35.215379,-80.855443,"""222 Bland St""",Asian Fusion;Ramen;Japanese;Restaurants


**Recommendation for Illinois**

In [31]:
illinois=recommend_restaurants(top_restaurants,40.1105875,-88.2072697)
illinois_list=[]
illinois_list=illinois.values.tolist()
illinois
illinois = df_restaurants[df_restaurants.state == 'IL'].iloc[0:30][['name','city','stars', 'latitude','longitude','address','categories']].sort_values(by=[ 'stars'], ascending=False)



Unnamed: 0,name,city,stars,latitude,longitude,address,categories
1991,"""JP Cool's Bar & Grill""",Mansfield,5.0,40.211684,-88.505562,"""14 N Jefferson St""",Food;Nightlife;Restaurants;Beer;Wine & Spirits...
9722,"""Pies by Inge""",Monticello,5.0,40.025702,-88.573161,"""212 W Washington St""",Desserts;Cupcakes;Restaurants;Bakeries;Food
7397,"""Arby's""",Champaign,4.5,40.13114,-88.257562,"""1502 N Prospect Ave""",Fast Food;Sandwiches;Restaurants
8897,"""C & C Kitchen""",Rantoul,4.5,40.310654,-88.158583,"""107 E Sangamon Ave""",Coffee & Tea;Food;Restaurants;Sandwiches;Break...
4853,"""Proud Marys'""",Tuscola,4.5,39.791242,-88.273007,"""1003 E Southline Rd""",Bars;Restaurants;Nightlife;American (Traditional)
190,"""Zorba's Restaurant""",Champaign,4.0,40.110085,-88.229304,"""627 E Green St""",Restaurants;Greek;Salad;Mediterranean
1341,"""I Wok""",Savoy,4.0,40.059012,-88.252238,"""113 Calvin St""",Restaurants;Chinese
8427,"""El Toro""",Urbana,3.5,40.123643,-88.200627,"""1104 N Cunningham Ave""",Restaurants;Mexican
6352,"""Silver Mine Subs""",Champaign,3.5,40.108174,-88.229693,"""612 E Daniel St""",Restaurants;Sandwiches
5752,"""Big Grove Tavern""",Champaign,3.5,40.118381,-88.242996,"""1 E Main St, Ste 101""",Bars;Breakfast & Brunch;Restaurants;Pubs;Gastr...


**Recommendation for Arizona**

In [41]:
arizona=recommend_restaurants(top_restaurants,-111.928001,32.424564)
arizona_list=[]
arizona_list=arizona.values.tolist()
arizona

2


Unnamed: 0,name,city,stars,latitude,longitude,address,categories
148820,"""Pizzeria Bianco""",Phoenix,4.0,33.44916,-112.065635,"""623 E Adams St""",Restaurants;Salad;Pizza;Italian;Sandwiches
152492,"""Four Peaks Brewing""",Tempe,4.5,33.419568,-111.916097,"""1340 E 8th St, Ste 104""",American (New);Sandwiches;Burgers;Nightlife;Ba...
100969,"""Bobby Q""",Phoenix,4.5,33.561085,-112.115747,"""8501 N 27th Ave""",Caterers;American (New);Restaurants;Event Plan...
115632,"""Lux Central""",Phoenix,4.5,33.500597,-112.074237,"""4400 N Central Ave""",Breakfast & Brunch;American (New);Bakeries;Res...
130242,"""Rehab Burger Therapy""",Scottsdale,4.5,33.491503,-111.925627,"""7210 E 2nd St""",Restaurants;Sandwiches;Bars;Nightlife;Burgers;...
171047,"""Cibo""",Phoenix,4.5,33.45496,-112.079908,"""603 N 5th Ave""",Sandwiches;Pizza;Restaurants;Italian
831,"""La Santisima""",Phoenix,4.0,33.469201,-112.047393,"""1919 N 16th St""",Vegetarian;Nightlife;Mexican;Bars;Vegan;Restau...
41036,"""The Mission Old Town""",Scottsdale,4.0,33.492287,-111.924273,"""3815 N Brown Ave""",Restaurants;Bars;Nightlife;Mexican;Latin American
163111,"""Joe's Farm Grill""",Gilbert,4.0,33.321824,-111.726057,"""3000 E Ray Rd, Bldg 1""",Italian;American (Traditional);Pizza;Restauran...
157852,"""Citizen Public House""",Scottsdale,4.5,33.49821,-111.927963,"""7111 E 5th Ave, Ste E""",Salad;Cocktail Bars;Bars;Gastropubs;American (...


**Recommendation for Colorado**

In [33]:
colorado=recommend_restaurants(top_restaurants,-8.392635,51.812438)
colorado_list=[]
colorado_list=arizona.values.tolist()
colorado


Unnamed: 0,name,city,stars,latitude,longitude,address,categories
898,"""The Abbey Bar & Restaurant""",Main Street,3.0,51.812438,-8.392635,"""Main Street, Carrigaline""",Irish;Bars;Nightlife;Restaurants
6307,"""Sage Restaurant""",Main Street,4.5,51.916151,-8.174059,"""No 8 Main Street, Midleton""",Irish;Restaurants


**Recommendation for LasVegas**

In [34]:
lasvegas=recommend_restaurants(top_restaurants,-115.287451,36.159483)
lasvegas_list=[]
lasvegas_list=arizona.values.tolist()
lasvegas


Unnamed: 0,name,city,stars,latitude,longitude,address,categories
538,"""Baja Fresh""",Las Vegas,5.0,36.159483,-115.287451,"""8780 W Charleston Blvd, Ste 100""",Restaurants;Mexican
778,"""The Hummus Factory""",Las Vegas,4.5,36.143204,-115.262866,"""7875 W Sahara Ave, Ste 101""",Event Planning & Services;Caterers;Burgers;Gre...
721,"""The Blacc Boxx""",North Las Vegas,4.5,36.196983,-115.180376,"""3011 W Lake Mead Blvd""",Sandwiches;Bakeries;Food;Chicken Wings;Cafes;R...
691,"""Mexicali Raspados""",Las Vegas,4.5,36.100688,-115.101083,"""4865 S Pecos Rd""",Restaurants;Ice Cream & Frozen Yogurt;Shaved I...
177,"""Geebee's Bar & Grill""",Las Vegas,4.5,36.034244,-115.171409,"""8560 Las Vegas Blvd S""",Restaurants;American (Traditional)
274,"""Trattoria Italia""",Las Vegas,4.5,36.010086,-115.118656,"""9905 S Eastern Ave, Ste 140""",Seafood;Italian;Pizza;Restaurants
303,"""Cancun Bar & Grill""",Las Vegas,4.5,36.098632,-115.136079,"""5006 S Maryland Pkwy, Ste 17""",Karaoke;Bars;Mexican;Restaurants;Nightlife;Dan...
366,"""Kinthai""",Las Vegas,4.5,36.144023,-115.195896,"""4105 W Sahara Ave""",Restaurants;Thai
52,"""Flight Deck Bar & Grill""",Las Vegas,4.0,36.066914,-115.170848,"""6730 S Las Vegas Blvd""",Nightlife;Bars;Barbeque;Sports Bars;American (...
428,"""Rise & Shine - A Steak & Egg Place""",Henderson,4.0,36.024323,-115.062857,"""75 S Valle Verde Dr""",Breakfast & Brunch;Restaurants


**Pushing the Data to DataBase**

In [None]:
for i in range(len(north_carolina_list)):
    article = {"State": "North Carolina",
            "Name": north_carolina_list[i][0],
            "City": north_carolina_list[i][1],
             "Rating":north_carolina_list[i][2], 
             "address":north_carolina_list[i][5],
             "category":north_carolina_list[i][6],
             "loc":{"type":"Point",
                     "coordinates": [north_carolina_list[i][4],north_carolina_list[i][3]]},}
    articles = db.recommendations
    result = articles.insert_one(article)

In [42]:
for i in range(len(lasvegas_list)):
    article = {"State": "LasVegas",
            "Name": lasvegas_list[i][0],
            "City": lasvegas_list[i][1],
            "Rating":lasvegas_list[i][2], 
            "address":lasvegas_list[i][5],
            "category":lasvegas_list[i][6],
             "loc": {"type":"Point",
                     "coordinates":[lasvegas_list[i][4],lasvegas_list[i][3]]},}
    articles = db.recommendations
    result = articles.insert_one(article)

In [43]:
for i in range(len(arizona_list)):
    article = {"State": "Arizona",
            "Name": arizona_list[i][0],
            "City": arizona_list[i][1],
             "Rating":arizona_list[i][2],
             "address":arizona_list[i][5],
             "category":arizona_list[i][6],  
             "loc":{"type":"Point",
                     "coordinates": [arizona_list[i][4],arizona_list[i][3]]},}
    articles = db.recommendations
    result = articles.insert_one(article)

In [44]:
for i in range(len(illinois_list)):
    article = {"State": "Illinois",
            "Name": illinois_list[i][0],
            "City": illinois_list[i][1],
             "Rating":illinois_list[i][2], 
            "address":illinois_list[i][5],
             "category":illinois_list[i][6],
             "loc":{"type":"Point",
                     "coordinates": [illinois_list[i][4],illinois_list[i][3]]},}
    articles = db.recommendations
    result = articles.insert_one(article)

In [45]:
for i in range(len(colorado_list)):
    article = {"State": "Colorado",
            "Name": colorado_list[i][0],
            "City": colorado_list[i][1],
            "Rating":colorado_list[i][2],
            "address":colorado_list[i][5],
             "category":colorado_list[i][6],
             "loc":{"type":"Point",
                     "coordinates": [colorado_list[i][4],colorado_list[i][3]]},}
    articles = db.recommendations
    result = articles.insert_one(article)

**Validation of Recommendation**

In [1]:
# illinois_list=[]
# illinois_list=illinois.values.tolist()
# illinois

# lasvegas = df_restaurants[df_restaurants.state == 'NV'].iloc[0:30][['name','city','stars', 'latitude','longitude','address','categories']].sort_values(by=[ 'stars'], ascending=False)

# lasvegas_list=[]
# lasvegas_list=lasvegas.values.tolist()
# lasvegas



# colorado = df_restaurants[df_restaurants.state == 'CO'].iloc[0:2][['name','city','stars', 'latitude','longitude','address','categories']]
# colorado_list=[]
# colorado_list=colorado.values.tolist()
# colorado