# Kmean clustering by topics weights to find similar restaurants

## Import necessary libraries 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

## Read the restaurant business with topics weights datasets

In [2]:
pos_restaurant_topic_file = '../Datasets/Positive_Restaurant_Topics.csv'
df_pos_restaurant_topic = pd.read_csv(pos_restaurant_topic_file)
del df_pos_restaurant_topic['Unnamed: 0']

In [3]:
df_pos_restaurant_topic.head(3)

Unnamed: 0,business_id,name,categories,city,stars,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,"American (Traditional), Bars, Restaurants, Nig...",Philadelphia,3.5,0.169421,0.0,0.0,0.014014,0.144462,0.672103
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,"Desserts, Food, Coffee & Tea, Restaurants",Philadelphia,4.5,0.49906,0.01698,0.184143,0.145282,0.098522,0.056013
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,"Restaurants, Cafes, Creperies",Philadelphia,3.5,0.224358,0.164145,0.037759,0.227137,0.272205,0.074397


In [4]:
df_pos_clus_present = df_pos_restaurant_topic[['business_id' , 'name' , 'topic_1' , 'topic_2' , 'topic_3' , 'topic_4' , 'topic_5' , 'topic_6']]
df_pos_clus_present.head(3)

Unnamed: 0,business_id,name,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,0.169421,0.0,0.0,0.014014,0.144462,0.672103
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,0.49906,0.01698,0.184143,0.145282,0.098522,0.056013
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,0.224358,0.164145,0.037759,0.227137,0.272205,0.074397


In [5]:
df_pos_res_clus =  df_pos_restaurant_topic[['topic_1' , 'topic_2' , 'topic_3' , 'topic_4' , 'topic_5' , 'topic_6']]
df_pos_res_clus.head(3)

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6
0,0.169421,0.0,0.0,0.014014,0.144462,0.672103
1,0.49906,0.01698,0.184143,0.145282,0.098522,0.056013
2,0.224358,0.164145,0.037759,0.227137,0.272205,0.074397


## Apply K-Means clustering method

In [6]:
kmeans = KMeans(n_clusters=6).fit(df_pos_res_clus) #compute k-means clustering
centroids = kmeans.cluster_centers_ # get cluster center
clusters = kmeans.predict(df_pos_res_clus)
df_pos_restaurant_topic.insert(11,"cluster",clusters,True)
print(centroids)


[[0.55596898 0.04922904 0.0260262  0.12907204 0.17674524 0.06295849]
 [0.16465728 0.05222994 0.02581891 0.13129194 0.18773514 0.43826678]
 [0.31634248 0.05816    0.02531432 0.10608559 0.40340218 0.09069543]
 [0.11748204 0.04677676 0.61740166 0.06872338 0.09539869 0.05421746]
 [0.25793112 0.07227905 0.03143323 0.37023517 0.18751899 0.08060245]
 [0.18807932 0.46582814 0.04305456 0.13915307 0.11026957 0.05361534]]


In [7]:
df_pos_restaurant_topic.head(3)

Unnamed: 0,business_id,name,categories,city,stars,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,cluster
0,-2-ih3mE8KPyeKVIzpBfPQ,SkyGarten,"American (Traditional), Bars, Restaurants, Nig...",Philadelphia,3.5,0.169421,0.0,0.0,0.014014,0.144462,0.672103,1
1,-T_lkOvaK39R-Ufg6VUyxg,Magpie,"Desserts, Food, Coffee & Tea, Restaurants",Philadelphia,4.5,0.49906,0.01698,0.184143,0.145282,0.098522,0.056013,0
2,-V0vIgo6196MDn_x3ZaYmA,La Creperie Cafe,"Restaurants, Cafes, Creperies",Philadelphia,3.5,0.224358,0.164145,0.037759,0.227137,0.272205,0.074397,4


In [8]:
df_pos_restaurant_topic_c0 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 0]
df_pos_restaurant_topic_c1 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 1]
df_pos_restaurant_topic_c2 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 2]
df_pos_restaurant_topic_c3 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 3]
df_pos_restaurant_topic_c4 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 4]
df_pos_restaurant_topic_c5 = df_pos_restaurant_topic[df_pos_restaurant_topic['cluster'] == 5]

## Restaurant similar to 'The Pizza Place'

Function : retrieve_similar_res
</br>
Parameter : restaurant name
</br>
output : a list of similar restaurants

In [9]:
def retrieve_similar_res(res_name):
    res_info = df_pos_restaurant_topic[df_pos_restaurant_topic['name'] == res_name]
    res_info = res_info.reset_index(drop=True)
    res_cluster = res_info['cluster'][0]
    df_result = df_pos_restaurant_topic[(df_pos_restaurant_topic['cluster'] == res_cluster) & (df_pos_restaurant_topic['name'] != res_name)]
    df_result = df_result.reset_index(drop=True)
    print("Restaurant Similar to '{}': ".format(res_name))
    print('='*50)
    for n in range(0 , len(df_result)):
        print('Restaurant: {} \ncategories: {:50}'.format(df_result['name'][n], df_result['categories'][n]))
        print('-'*50)


In [10]:
retrieve_similar_res("The Pizza Place")

Restaurant Similar to 'The Pizza Place': 
Restaurant: Francoluigi's Pizzeria & Italian Restaurant 
categories: Italian, Restaurants, Pizza                       
--------------------------------------------------
Restaurant: Lazos Pizza & Grill 
categories: Pizza, American (Traditional), Restaurants, Italian
--------------------------------------------------
Restaurant: Bufad 
categories: Desserts, Event Planning & Services, Caterers, Vegetarian, Pizza, Restaurants, Food, Italian, Venues & Event Spaces
--------------------------------------------------
Restaurant: Kosmo Pizza & Grille 
categories: Restaurants, Pizza, Sandwiches, American (Traditional), Food Delivery Services, Food
--------------------------------------------------
Restaurant: Robert Chiarella's Gourmet Pizzeria 
categories: Pizza, Restaurants, Sandwiches, Italian           
--------------------------------------------------
Restaurant: The Crocodile 
categories: Nightlife, Arts & Entertainment, Food, Pizza, Restaurants