# _Restaurant Recommendation System_
## Prepare import

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import warnings
warnings.filterwarnings("ignore")

## Load Data
### GoogleReview
#### View Sample, Check Null Data For Review

In [5]:
ratings_data = pd.read_csv('GoogleReview_data_cleaned.csv')
ratings_data = ratings_data[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]
print("Review data:", ratings_data.shape)
print("\nCheck NULL values in Reviews:\n-----------------------------")
print(ratings_data.isnull().sum())
print("-----------------------------")
ratings_data.head()

Review data: (222020, 5)

Check NULL values in Reviews:
-----------------------------
Author        0
Rating        0
Review        0
Restaurant    0
Location      0
dtype: int64
-----------------------------


Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


In [6]:
df_data2 = ratings_data.copy()
df_data2.head()

Unnamed: 0,Author,Rating,Review,Restaurant,Location
0,Jia Pin Lee,4.0,Came here for the High Tea. Great service espe...,Cuisines Restaurant,Ipoh
1,Chui Yi Lum,2.0,"5 stars for the service, even though some of t...",Cuisines Restaurant,Ipoh
2,liezel wong,1.0,"Hi, thank you for your service. But! i feel so...",Cuisines Restaurant,Ipoh
3,Nazri Nor,1.0,I have the worse buffer dinner ever so far. Th...,Cuisines Restaurant,Ipoh
4,Fakru Imran's Channel,5.0,"That's are Known 5 Elmark "" 9H72 "" & KDK "" 3 K...",Cuisines Restaurant,Ipoh


# Collaborative Filtering Recommendation System

## ITEM-BASED COLLABORATIVE FILTERING


In [7]:
# Building User-Item Interactions Matrix
author_restaurant_matrix = df_data2.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
# cikgusally,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Ativ Mindworks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#GJBlane RICE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#JL_King_Of_Music,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#MyNameIsMuna #MUNALICIOUS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### a. Identifying Author Who Rated a Given Restaurant

In [8]:
def get_Author(restaurantName):
    author_ratings = author_restaurant_matrix[restaurantName]
    return author_ratings

### a. User Interaction Sessions 

Sample Data:
- 我家餐館Our Kitchen Nyonya Restaurant
- 1919 Restaurant Ipoh
- 心安素食斋料食馆 Xin An Vegetarian Cafe

In [10]:
print("Welcome Sir/Madam!\nThis session is about Knowing Who Rated A Given Restaurant")
print("----------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")

restaurantName = input("Please provide a restaurant name: ")

Welcome Sir/Madam!
This session is about Knowing Who Rated A Given Restaurant
----------------------------------------------------------
NOTE: Restaurant NAME must be in full form
Please provide a restaurant name: 1919 Restaurant Ipoh


In [11]:
if restaurantName in list(df_data2["Restaurant"]):
    print("Restaurant Name: " + restaurantName)
    print("=================================================================")
    print("Here's the OTHER Author(s) Who Rated This Restaurant:-")
    print("=================================================================")
else: 
    print("No Restaurants Found...")

Restaurant Name: 1919 Restaurant Ipoh
Here's the OTHER Author(s) Who Rated This Restaurant:-


In [12]:
# Display author listing
get_Author(restaurantName).head(10)

Author
# cikgusally                  0.0
#Ativ Mindworks               0.0
#GJBlane RICE                 0.0
#JL_King_Of_Music             0.0
#MyNameIsMuna #MUNALICIOUS    0.0
#Ramen #Mee                   0.0
#SamuelLabo                   0.0
#SuPeR# (SuPerPauL)           0.0
#ryuki2517                    0.0
#바나나스프                        0.0
Name: 1919 Restaurant Ipoh, dtype: float64

#### b. KNN Item-based

In [13]:
# Define the index for Restaurant Name For Later Recommendation
restaurant_author_matrix = df_data2.pivot_table(index = 'Restaurant', columns = ['Author'], values = 'Rating').fillna(0)
restaurant_author_matrix.head(50)

Author,# cikgusally,#Ativ Mindworks,#GJBlane RICE,#JL_King_Of_Music,#MyNameIsMuna #MUNALICIOUS,#Ramen #Mee,#SamuelLabo,#SuPeR# (SuPerPauL),#ryuki2517,#바나나스프,...,홍성훈,홍우진,화성인,황명설,황무환,효니의 관심사Honey’s interest,흰호랑이,ﹰﹰChoon Woo,Ｎａｋａ Ｍｕｒａ,ｊ ｍ
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'D' Selera Kelate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1919 Restaurant Ipoh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Chulia Lane Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21 Bistro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218 Hainan Lor Mee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27@cove,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 :15 Auntie Hong's Cooking,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
# Define Compressed Sparse Row (CSR)
# Non-zero elements are stored in compressed row format, meaning that each row is represented by a list of the non-zero elements
# in that row, along with their corresponding column indices.
from scipy.sparse import csr_matrix

csr_matrix_rating = csr_matrix(restaurant_author_matrix)
csr_matrix_rating

<1322x125087 sparse matrix of type '<class 'numpy.float64'>'
	with 214264 stored elements in Compressed Sparse Row format>

KNN (K-Nearest Neighbors) is a machine learning algorithm that can be used for classification or regression tasks. It finds the k-nearest neighbors of a data point based on a distance metric, and then uses those neighbors to make a prediction.

When working with large datasets, it is often the case that the majority of the data is sparse, meaning that most of the values are zero. In this case, it can be more efficient to store the data in a sparse matrix format such as csr_matrix.

Using csr_matrix with KNN can help to reduce the memory usage and computational time required for finding the nearest neighbors. This is because the csr_matrix format only stores the non-zero elements and their locations, which can greatly reduce the size of the data in memory.

In [15]:
# Define K-Nearest Neighbour (KNN)
# Unsupervised machine learning algorithm used for finding the k-nearest neighbors of a given data point in a dataset. 
# It is commonly used for recommendation systems, image recognition, and anomaly detection.
from sklearn.neighbors import NearestNeighbors

KNN_Model = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
KNN_Model.fit(csr_matrix_rating)

NearestNeighbors(algorithm='brute', metric='cosine')

In [16]:
def recommend(restaurant_name, k_value):
    distances, indices = KNN_Model.kneighbors(restaurant_author_matrix[restaurant_author_matrix.index == restaurant_name].values.reshape(1,-1), n_neighbors = int(k_value))
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print("K-Nearest Neighbouring (KNN) Restaurant Recommendations :")
            print("=======================================================================================================")
            print("Recommendations Based On Restaurant Name: " + restaurant_name)
            print("=======================================================================================================")
        else:
            name = author_restaurant_matrix.index[indices.flatten()[i]]
            distance_points = distances.flatten()[i]
            print(f"{i}: {name} \t\t\t\t, with distance of {distance_points}:")
    
    # Exclude one from the k neighbours to get the number of the rest restaurants
    noOfRestaurants = int(k_value) - 1
    print("=======================================================================================================")
    print("                               You've Suggested To View " + str(noOfRestaurants) + " of Restaurants")
    print("                =======================================================================")

### User Interactions Section


Sample data:
- AQUA Restaurant & Bar
- Ah Hua Kuey Teow
- Ah Chui Seafood
- Anderson Curry House
- Amy Heritage Nyonya Cuisine

In [17]:
print("Welcome Sir/Madam!\nThis session is about Restaurant Recommendation Based on K-Nearest Neighbouring (KNN)")
print("-------------------------------------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")
restaurant_name = input("Please enter the restaurant name: ")

print("\nNOTE: Please provide in NUMBER")
k_value = input("Please enter the number of restaurants (K-Value) for us to recommend to you: ")

Welcome Sir/Madam!
This session is about Restaurant Recommendation Based on K-Nearest Neighbouring (KNN)
-------------------------------------------------------------------------------------
NOTE: Restaurant NAME must be in full form
Please enter the restaurant name: Ah Hua Kuey Teow

NOTE: Please provide in NUMBER
Please enter the number of restaurants (K-Value) for us to recommend to you: 10


In [18]:
recommend(restaurant_name, k_value)

K-Nearest Neighbouring (KNN) Restaurant Recommendations :
Recommendations Based On Restaurant Name: Ah Hua Kuey Teow
1: AA R 				, with distance of 0.9627364214006987:
2: Abdul Rahman Morni 				, with distance of 0.9685277537324877:
3: 02424ytc 				, with distance of 0.9711038072853727:
4: ALI NAFE 				, with distance of 0.9716451475932806:
5: Aaron Chun 				, with distance of 0.9744841441454131:
6: AFM 3939 				, with distance of 0.9749323819887327:
7: Aamir Kapasi 				, with distance of 0.9781527320860403:
8: ANIQ 				, with distance of 0.9782873114428285:
9: ABDUL AZIZ 				, with distance of 0.9795534111139069:
                               You've Suggested To View 9 of Restaurants


#### c. Correlation Between A Selected Restaurant and All Other Restaurants

Sample Data:
- 我家餐館Our Kitchen Nyonya Restaurant
- 1919 Restaurant Ipoh
- 心安素食斋料食馆 Xin An Vegetarian Cafe

In [20]:
print("Welcome Sir/Madam!\nThis session is about Most Similar Restaurant Recommendation")
print("------------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")

restaurant_name = input("Please provide a restaurant name: ")

Welcome Sir/Madam!
This session is about Most Similar Restaurant Recommendation
------------------------------------------------------------
NOTE: Restaurant NAME must be in full form
Please provide a restaurant name: 1919 Restaurant Ipoh


In [21]:
# Use get_Author Function Defined in A Section
author_ratings = get_Author(restaurant_name)
author_ratings.head(10)

Author
# cikgusally                  0.0
#Ativ Mindworks               0.0
#GJBlane RICE                 0.0
#JL_King_Of_Music             0.0
#MyNameIsMuna #MUNALICIOUS    0.0
#Ramen #Mee                   0.0
#SamuelLabo                   0.0
#SuPeR# (SuPerPauL)           0.0
#ryuki2517                    0.0
#바나나스프                        0.0
Name: 1919 Restaurant Ipoh, dtype: float64

In [78]:
similar_restaurants = author_restaurant_matrix.corrwith(author_ratings)

# Create a dataframe with similar restaurants as the index column and name another column as correlation
similar_restaurants = pd.DataFrame(similar_restaurants, columns = ['Correlation'])
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")

similar_restaurants.head(10)

💠Correlation-Based Recommendation Based on Restaurant: 1919 Restaurant Ipoh


Unnamed: 0_level_0,Correlation
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,-0.000291
16th St. Cafe,-0.00105
1919 Restaurant Ipoh,1.0
20 Chulia Lane Cafe,-0.000721
21 Bistro,-9.2e-05
218 Hainan Lor Mee,-0.000783
27@cove,-0.000666
28 Food Centre,-0.001043
3 :15 Auntie Hong's Cooking,-0.000359
33 Blue Room,-0.001566


### Recommending The Most Similar Restaurants

In [79]:
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")

similar_restaurants.sort_values(by = 'Correlation', ascending = False).head(10)

💠Correlation-Based Recommendation Based on Restaurant: 1919 Restaurant Ipoh


Unnamed: 0_level_0,Correlation
Restaurant,Unnamed: 1_level_1
1919 Restaurant Ipoh,1.0
The Deck Gastrobar,0.038099
STG Tea House Cafe,0.036571
Restaurant Sun Yeong Wai (Medan Ipoh Branch),0.034055
Padi House Greentown Ipoh,0.03378
Yum Yum Restaurant Ipoh,0.033415
Citrus Wine & Dine,0.031838
STG Ipoh Oldtown,0.030419
A Jie Cafe,0.029676
Tandoor Grill,0.027766


For the restaurant name that is passed to the pivot table, the correlation between this restaurant and itself always 1; while other restaurants are correlated to it.

### Recommending The Most Similar Popular Restaurants
To determine the correlation strength of a restaurant's recommendation, it is necessary to consider the number of ratings, as a higher number of ratings is indicative of greater popularity.

In [80]:
df_rating = pd.DataFrame(df_data2.groupby('Restaurant')['Rating'].count())  
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")
df_rating.head(10)

💠Correlation-Based Recommendation Based on Restaurant: 1919 Restaurant Ipoh


Unnamed: 0_level_0,Rating
Restaurant,Unnamed: 1_level_1
'D' Selera Kelate,11
16th St. Cafe,134
1919 Restaurant Ipoh,142
20 Chulia Lane Cafe,62
21 Bistro,1
218 Hainan Lor Mee,75
27@cove,53
28 Food Centre,136
3 :15 Auntie Hong's Cooking,18
33 Blue Room,300


In [81]:
similar_popular_restaurants = similar_restaurants.join(df_rating['Rating'], on = 'Restaurant', how = 'left').sort_values(by = 'Correlation', ascending = False)
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")
similar_popular_restaurants.head(10)

💠Correlation-Based Recommendation Based on Restaurant: 1919 Restaurant Ipoh


Unnamed: 0_level_0,Correlation,Rating
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1
1919 Restaurant Ipoh,1.0,142
The Deck Gastrobar,0.038099,23
STG Tea House Cafe,0.036571,384
Restaurant Sun Yeong Wai (Medan Ipoh Branch),0.034055,140
Padi House Greentown Ipoh,0.03378,322
Yum Yum Restaurant Ipoh,0.033415,254
Citrus Wine & Dine,0.031838,108
STG Ipoh Oldtown,0.030419,264
A Jie Cafe,0.029676,259
Tandoor Grill,0.027766,497


### Recommending Defined Number of Most Similar Popular Restaurants Based On User Input

In [87]:
def recommend_item_based(restaurant_Name, noOfRestaurants):
    if restaurant_Name in df_data2["Restaurant"].values:
        rating_count = pd.DataFrame(df_data2["Restaurant"].value_counts())
        
        # For restaurant overall number of rating count less than 100 is considered rare restaurant
        rare_restaurants = rating_count[rating_count["Restaurant"] <= 100].index
        
        # Get restaurant that is not in the set of rare restaurant
        common_restaurants = df_data2[~df_data2["Restaurant"].isin(rare_restaurants)]
            
        # For restaurant name provided that is in rare case    
        if restaurant_Name in rare_restaurants:
            most_common = pd.Series(common_restaurants["Restaurant"].unique()).sample(3).values
            print("💫 Emm, It Seems Like There's No Recommendations for this Restaurant: " + restaurant_Name + " ☹️☹️")
            print("==============================================================================================")
            print("BUT, YOU MAY TRY ON THE MOST COMMON RESTAURANTS LiSTED AS BELOW: \n ")
            print("\t1. {}".format(most_common[0]), "\n")
            print("\t2. {}".format(most_common[1]), "\n")
            print("\t3. {}".format(most_common[2]), "\n")
        else:
            # Define a new matrix for pivot table
            similar_restaurants_pivot = common_restaurants.pivot_table(index = "Author", columns = ["Restaurant"], values = "Rating").fillna(0)
            
            # Get the provided restaurant name inside the pivot
            author_ratings = similar_restaurants_pivot[restaurant_Name]
            
            # Apply Correlation Matrix and create with dataframe
            similar_restaurants = similar_restaurants_pivot.corrwith(author_ratings).sort_values(ascending = False)
            similar_restaurants = pd.DataFrame(similar_restaurants, columns = ['Correlation'])
            
            # Drop the same name for the recommendation based current restaurant name
            if restaurant_Name in similar_restaurants.index:
                similar_restaurants = similar_restaurants.drop(similar_restaurants[similar_restaurants.index == restaurant_Name].index[0])
            
            # Generate total number of rating count for each restarant
            df_rating = pd.DataFrame(df_data2.groupby('Restaurant')['Rating'].count())
            
            # Combine Correlation Table With Rating Count Table
            recommendation_df = similar_restaurants.join(df_rating['Rating'], on = 'Restaurant').sort_values(by = 'Correlation', ascending = False)[0:int(noOfRestaurants)]
                
            recommendation_df.rename(columns={"Correlation":"Correlation Value", "Rating":"Rating Count"}, inplace = True)
            
            print("==========================================================================================")
            print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_Name)
            print("💠Total Number of Restaurants Suggested to you: " + noOfRestaurants)
            print("==========================================================================================")
            return recommendation_df
    else:
         print("❌  No Such Restaurants Found: " + restaurant_Name)

### User Interaction Sessions

Sample Recommended Data, rating count > 100
- 我家餐館Our Kitchen Nyonya Restaurant
- 1919 Restaurant Ipoh
- 心安素食斋料食馆 Xin An Vegetarian Cafe
- 越南小廚 V NAM KITCHEN

Sample No Recommended Data, rating count <= 100
- The Deck Gastrobar
- 'D' Selera Kelate
- 21 Bistro
- 27@cove

Sample No Found Data 
- Any of the value
- Hallo

In [85]:
print("Welcome Sir/Madam!\nThis session is about Most Similar Popular Restaurant Recommendation")
print("---------------------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")
restaurant_name = input("Please provide a restaurant name: ")

print("\nNOTE: Please provide in NUMBER")
noOfRestaurants = input("Please enter the number of restaurants for us to recommend for you: ")

Welcome Sir/Madam!
This session is about Most Similar Popular Restaurant Recommendation
---------------------------------------------------------------------
NOTE: Restaurant NAME must be in full form
Please provide a restaurant name: 27@cove

NOTE: Please provide in NUMBER
Please enter the number of restaurants for us to recommend for you: 20


In [88]:
# Arrange By Highest Correlation Value
recommend_item_based(restaurant_name, noOfRestaurants)

💫 Emm, It Seems Like There's No Recommendations for this Restaurant: 27@cove ☹️☹️
BUT, YOU MAY TRY ON THE MOST COMMON RESTAURANTS LiSTED AS BELOW: 
 
	1. Enorme Italian Restaurant 

	2. Atok Kopitiam 

	3. Healy Mac's Irish Bar 



## User-based Collaborative Filtering 


In [30]:
author_restaurant_matrix = df_data2.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

Restaurant,'D' Selera Kelate,16th St. Cafe,1919 Restaurant Ipoh,20 Chulia Lane Cafe,21 Bistro,218 Hainan Lor Mee,27@cove,28 Food Centre,3 :15 Auntie Hong's Cooking,33 Blue Room,...,寒舍 HANSHE @Perling,心安素食斋料食馆 Xin An Vegetarian Cafe,怡保古早味猪肠粉 Ipoh Traditional Style Chee Cheong Fun,我家餐館Our Kitchen Nyonya Restaurant,海皇粿条仔Restaurant Hi Wan,田園粥火锅 Farmland Porridge Steamboat,相聚火锅 The Gather BBQ Steamboat(新犀鸟阁 New Hornbill BBQ Steamboat）,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner,食得福美食中心Cedar Point Food Centre
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
# cikgusally,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Ativ Mindworks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#GJBlane RICE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#JL_King_Of_Music,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#MyNameIsMuna #MUNALICIOUS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
restaurant_author_matrix = author_restaurant_matrix.transpose()
restaurant_author_matrix.head(10)

Author,# cikgusally,#Ativ Mindworks,#GJBlane RICE,#JL_King_Of_Music,#MyNameIsMuna #MUNALICIOUS,#Ramen #Mee,#SamuelLabo,#SuPeR# (SuPerPauL),#ryuki2517,#바나나스프,...,홍성훈,홍우진,화성인,황명설,황무환,효니의 관심사Honey’s interest,흰호랑이,ﹰﹰChoon Woo,Ｎａｋａ Ｍｕｒａ,ｊ ｍ
Restaurant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'D' Selera Kelate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16th St. Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1919 Restaurant Ipoh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20 Chulia Lane Cafe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21 Bistro,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218 Hainan Lor Mee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27@cove,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28 Food Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 :15 Auntie Hong's Cooking,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33 Blue Room,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### a. Recommend Based On Correlations between Am Author and All Other Authors

In [32]:
restaurant_ratings = restaurant_author_matrix['Ooi Chuan Chong']
restaurant_ratings.head(10)

Restaurant
'D' Selera Kelate              0.0
16th St. Cafe                  0.0
1919 Restaurant Ipoh           0.0
20 Chulia Lane Cafe            0.0
21 Bistro                      0.0
218 Hainan Lor Mee             0.0
27@cove                        0.0
28 Food Centre                 0.0
3 :15 Auntie Hong's Cooking    0.0
33 Blue Room                   0.0
Name: Ooi Chuan Chong, dtype: float64

In [33]:
restaurant_ratings = restaurant_author_matrix['Jay Chin']
restaurant_ratings.head(10)

Restaurant
'D' Selera Kelate              0.0
16th St. Cafe                  0.0
1919 Restaurant Ipoh           0.0
20 Chulia Lane Cafe            0.0
21 Bistro                      0.0
218 Hainan Lor Mee             0.0
27@cove                        0.0
28 Food Centre                 0.0
3 :15 Auntie Hong's Cooking    0.0
33 Blue Room                   0.0
Name: Jay Chin, dtype: float64

In [34]:
# This is to find correlation between author based on the ratings of restaurants
similar_authors = restaurant_author_matrix.corrwith(restaurant_ratings)

# Create a dataframe with similar restaurants as the index column and correlation as another column
similar_authors = pd.DataFrame(similar_authors, columns = ['Correlation'])
similar_authors.head(10)

Unnamed: 0_level_0,Correlation
Author,Unnamed: 1_level_1
# cikgusally,-0.000757
#Ativ Mindworks,-0.000757
#GJBlane RICE,-0.000757
#JL_King_Of_Music,-0.000757
#MyNameIsMuna #MUNALICIOUS,-0.002142
#Ramen #Mee,-0.000757
#SamuelLabo,-0.000757
#SuPeR# (SuPerPauL),-0.000757
#ryuki2517,0.435751
#바나나스프,-0.001312


### Recommending Restaurants Based on The Most Similar Author

In [35]:
# Recommending Restaurants Based on The Most Similar Authors
most_similar_authors = similar_authors.sort_values('Correlation', ascending = False).iloc[1:11]
most_similar_authors

Unnamed: 0_level_0,Correlation
Author,Unnamed: 1_level_1
ces balucay,1.0
michael fong,1.0
KM GAN,1.0
stephen lai,1.0
Candice Jayce,1.0
thomas yoong,1.0
jg puch,1.0
Hardy Deiparine,1.0
Kok Siong Yeo,1.0
Andyy Chong,1.0


In [73]:
# Extract Author of the most similar author
authors = most_similar_authors.index.values.tolist()

recommendation = df_data2[df_data2['Author'] == authors[0]]

print("🟧 " + authors[0])
recommendation

🟧 ces balucay


Unnamed: 0,Author,Rating,Review,Restaurant,Location
212601,ces balucay,3.0,Service and food is good but price is on a ver...,Laguna Filipino Bar & Restaurant,Petaling Jaya


#### b. Recommend Based On Cosine Similarity

In [38]:
# Drop authors who vote less than 50 times.
new_data = df_data2[df_data2['Author'].map(df_data2['Author'].value_counts()) > 50] 
author_restaurant_pivot = new_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_pivot.head()

Restaurant,218 Hainan Lor Mee,33 Blue Room,4Fingers Crispy Chicken @ Sunway Pyramid,ATAS,Aliyaa,Annie Kolo Mee,Antipodean @ Atria,Antipodean @ Mid Valley,Antipodean Cafe,Apollo Seafood Centre,...,"YEN, W Kuala Lumpur",Yue @ Sheraton PJ,Yun House,ZENZERO Restaurant & Wine Bar,"Zipangu, Shangri-La Hotel, Kuala Lumpur",leaf & co. cafe,லூசி லிங்கம் வாழை இலை உணவகம்,人間茶坊之老店 Life Cafe @ Padungan,越南小廚 V NAM KITCHEN,青山角 / Green Hill Corner
Author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adrian Yong,0.0,0.0,0.0,0.0,5.0,0.0,4.0,0.0,4.0,0.0,...,0.0,3.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
Arthur Lee,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,0.0,4.0
Khairul Anuar Shaharudin,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,0.0,5.0,0.0,3.0,0.0,0.0,0.0,0.0
Lisa Chan,5.0,0.0,0.0,1.0,0.0,0.0,0.0,5.0,4.0,0.0,...,0.0,0.0,5.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
William Tan,0.0,5.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0


In [45]:
# Get the current author's favorite restaurants based on highest ratings
def users_choice(id, noOfRestaurants):
    users_fav = new_data[new_data["Author"] == id].sort_values(["Rating"], ascending = False)[0:int(noOfRestaurants)]
    return users_fav

In [46]:
# Perform consine similarity matrix
from sklearn.metrics.pairwise import cosine_similarity

def user_based(new_data, id, noOfRestaurants):
    user_recommend = []
    
    if id not in new_data["Author"].values:
        print("❌  No Such Authors Found: " + id)       
    else:
        index = np.where(author_restaurant_pivot.index == id)[0][0]
        similarity = cosine_similarity(author_restaurant_pivot)
        similar_users = list(enumerate(similarity[index]))
        similar_users = sorted(similar_users, key = lambda x:x[1],reverse = True)[0:int(noOfRestaurants)]
    
        for i in similar_users:
            data = df_data2[df_data2["Author"] == author_restaurant_pivot.index[i[0]]]
            user_recommend.extend(list(data.drop_duplicates("Author")["Author"].values))
        
    return user_recommend

In [47]:
def similar_restaurant(new_data, user, author_name, noOfRestaurants):
    x = new_data[new_data["Author"] == author_name]
    recommend_df = []
    
    user = list(user)
    
    for i in user:
        y = new_data[(new_data["Author"] == i)]
        restaurants = y.loc[~y["Restaurant"].isin(x["Restaurant"]), :]
        restaurants = restaurants.sort_values(["Rating"], ascending = False)[0:5]
        recommend_df.extend(restaurants["Restaurant"].values)
        
    return recommend_df[0:int(noOfRestaurants)]

In [66]:
def recommend_user_based(author_name, noOfRestaurants): 
    user_choice_df = pd.DataFrame(users_choice(author_name, noOfRestaurants))
    
    print("🟧 Author: {} ".format(author_name))
    
    user_based_recommendation = user_based(new_data, author_name, noOfRestaurants)
    restaurants_for_user = similar_restaurant(new_data, user_based_recommendation, author_name, noOfRestaurants)
    restaurant_for_user_df = pd.DataFrame(restaurants_for_user, columns = ["Restaurant"])
    
    return restaurant_for_user_df

### User Interaction Sessions

Sample Author Data:
- Adrian Yong
- William Tan

Sample No Found Data:
- Any of the value
- GGGGGGGGGG

In [69]:
print("Welcome Sir/Madam!\nThis session is about WHAT Recommendation On Most Similar Authors Would Also Liked")
print("----------------------------------------------------------------------------------")
print("NOTE: Author must be in full name")
author_name = input("Please provide an author name: ")

print("\nNOTE: Please provide in NUMBER")
noOfRestaurants = input("Please enter the number of restaurants for us to recommend for you: ")

Welcome Sir/Madam!
This session is about WHAT Recommendation On Most Similar Authors Would Also Liked
----------------------------------------------------------------------------------
NOTE: Author must be in full name
Please provide an author name: William Tan

NOTE: Please provide in NUMBER
Please enter the number of restaurants for us to recommend for you: 8


In [70]:
recommend_user_based(author_name, noOfRestaurants)

🟧 Author: William Tan 


Unnamed: 0,Restaurant
0,Ayam Garam Aun Kheng Lim (Main Branch)
1,Sushi Hibiki
2,Marta's Kitchen @ Hartamas
3,Chuup
4,Restoran Sweet Inn
5,Vin's Restaurant
6,Aliyaa
7,K Fry Urban Korean Holiday Villa
