# Kmean clustering by topics weights to find similar restaurants

## Import necessary libraries 

In [15]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

## Read the restaurant business with topics weights datasets

In [16]:
pos_restaurant_topic_file = '../Datasets/Positive_Restaurant_Topics.csv'
df_pos_restaurant_topic = pd.read_csv(pos_restaurant_topic_file)
del df_pos_restaurant_topic['Unnamed: 0']

In [17]:
df_pos_restaurant_topic.head(3)

Unnamed: 0,business_id,name,categories,city,stars,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,"American (Traditional), Bars, Restaurants, Nig...",Philadelphia,3.5,0.170043,0.0,0.0,0.014064,0.145168,0.670726
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,"Desserts, Food, Coffee & Tea, Restaurants",Philadelphia,4.5,0.49908,0.016986,0.18418,0.145433,0.098592,0.055729
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,"Restaurants, Cafes, Creperies",Philadelphia,3.5,0.224335,0.164131,0.03775,0.227311,0.27245,0.074022


In [18]:
df_pos_clus_present = df_pos_restaurant_topic[['business_id' , 'name' , 'topic_1' , 'topic_2' , 'topic_3' , 'topic_4' , 'topic_5' , 'topic_6']]
df_pos_clus_present.head(3)

Unnamed: 0,business_id,name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,0.170043,0.0,0.0,0.014064,0.145168,0.670726
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,0.49908,0.016986,0.18418,0.145433,0.098592,0.055729
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,0.224335,0.164131,0.03775,0.227311,0.27245,0.074022


In [19]:
df_pos_res_clus =  df_pos_restaurant_topic[['topic_1' , 'topic_2' , 'topic_3' , 'topic_4' , 'topic_5' , 'topic_6']]
df_pos_res_clus.head(3)

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,0.170043,0.0,0.0,0.014064,0.145168,0.670726
1,0.49908,0.016986,0.18418,0.145433,0.098592,0.055729
2,0.224335,0.164131,0.03775,0.227311,0.27245,0.074022


## Apply K-Means clustering method

In [20]:
kmeans = KMeans(n_clusters=6).fit(df_pos_res_clus) #compute k-means clustering
centroids = kmeans.cluster_centers_ # get cluster center
clusters = kmeans.predict(df_pos_res_clus)
df_pos_restaurant_topic.insert(11,"cluster",clusters,True)
print(centroids)


[[0.55596943 0.04925348 0.02603008 0.12916122 0.1769055  0.06268029]
 [0.3163212  0.05819385 0.02531286 0.10615383 0.40370722 0.09031103]
 [0.18804789 0.46598931 0.0430542  0.13922931 0.1103424  0.05333689]
 [0.11747272 0.04679387 0.61751602 0.06879234 0.09546997 0.05395508]
 [0.25790211 0.07230473 0.0314339  0.37047974 0.18766266 0.08021685]
 [0.16490808 0.05234165 0.02587004 0.13162187 0.18819902 0.43705933]]


In [21]:
df_pos_restaurant_topic.head(3)

Unnamed: 0,business_id,name,categories,city,stars,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,cluster
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,"American (Traditional), Bars, Restaurants, Nig...",Philadelphia,3.5,0.170043,0.0,0.0,0.014064,0.145168,0.670726,5
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,"Desserts, Food, Coffee & Tea, Restaurants",Philadelphia,4.5,0.49908,0.016986,0.18418,0.145433,0.098592,0.055729,0
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,"Restaurants, Cafes, Creperies",Philadelphia,3.5,0.224335,0.164131,0.03775,0.227311,0.27245,0.074022,4


In [22]:
df_pos_restaurant_topic_c0 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 0]
df_pos_restaurant_topic_c1 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 1]
df_pos_restaurant_topic_c2 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 2]
df_pos_restaurant_topic_c3 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 3]
df_pos_restaurant_topic_c4 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 4]
df_pos_restaurant_topic_c5 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 5]

## Restaurant similar to 'The Pizza Place'

Function : retrieve_similar_res
</br>
Parameter : restaurant name
</br>
output : a list of similar restaurants

In [23]:
def retrieve_similar_res(res_name):
    res_info = df_pos_restaurant_topic[df_pos_restaurant_topic['name'] == res_name]
    res_info = res_info.reset_index(drop=True)
    res_cluster = res_info['cluster'][0]
    df_result = df_pos_restaurant_topic[(df_pos_restaurant_topic['cluster'] == res_cluster) & (df_pos_restaurant_topic['name'] != res_name)]
    df_result = df_result.reset_index(drop=True)
    print("Restaurant Similar to '{}': ".format(res_name))
    print('='*50)
    for n in range(0 , len(df_result)):
        print('Restaurant: {} \ncategories: {:50}'.format(df_result['name'][n], df_result['categories'][n]))
        print('-'*50)


In [24]:
retrieve_similar_res("Horizons")

Restaurant Similar to 'Horizons': 
Restaurant: Magpie 
categories: Desserts, Food, Coffee & Tea, Restaurants         
--------------------------------------------------
Restaurant: Le Chéri 
categories: Restaurants, French                               
--------------------------------------------------
Restaurant: DaMò Pasta Lab 
categories: Italian, Pasta Shops, Restaurants, Food, Specialty Food
--------------------------------------------------
Restaurant: Oishii Poké 
categories: Restaurants, Mexican, Food, Hawaiian, Poke, Japanese
--------------------------------------------------
Restaurant: Il Pittore 
categories: Restaurants, American (New), Diners, Italian      
--------------------------------------------------
Restaurant: Wajoe 
categories: Korean, Restaurants                               
--------------------------------------------------
Restaurant: Urban Enoteca 
categories: Nightlife, Bars, Restaurants, American (New), Italian
-------------------------------------------