# _Restaurant Recommendation System_
## Prepare import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import warnings
from prettytable import PrettyTable
from tabulate import tabulate
warnings.filterwarnings("ignore")

## Load Data
### GoogleReview
#### View Sample, Check Null Data For Review

In [None]:
ratings_data = pd.read_csv('GoogleReview_data_cleaned.csv')
ratings_data = ratings_data[['Author', 'Rating', 'Review', 'Restaurant', 'Location']]
print("Review data:", ratings_data.shape)
print("\nCheck NULL values in Reviews:\n-----------------------------")
print(ratings_data.isnull().sum())
print("-----------------------------")
ratings_data.head()

In [None]:
df_data2 = ratings_data.copy()
df_data2.head()

# Data Visualization

### Top 15 Number Of Ratings On Restaurants


In [None]:
plt.figure(figsize = (10, 8))
plt.title("TOP 15 Number of Ratings On Restaurants", fontsize = 24)
plt.xlabel("X label", fontsize = 24)  
plt.ylabel("Y label", fontsize = 24)   
ax = sns.countplot(y = "Restaurant", data = df_data2, order = df_data2['Restaurant'].value_counts().index[0:15])

# Add annotations to the bars
for p in ax.patches:
    ax.annotate(str(p.get_width()), (p.get_width() + 10, p.get_y() + 0.5), fontsize=16)

# Collaborative Filtering Recommendation System

## 1.0 Item-based Collaborative Filtering

In [None]:
# Building User-Item Interactions Matrix
author_restaurant_matrix = df_data2.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head(15)

#### Define function for identifying Author Who Rated a Given Restaurant

In [None]:
def get_Author(restaurantName):
    author_ratings = author_restaurant_matrix[restaurantName][author_restaurant_matrix[restaurantName] > 0.0].to_frame(name='Rating')
    return author_ratings

### User Interaction Sessions 

Sample Data:
- 我家餐館Our Kitchen Nyonya Restaurant
- 1919 Restaurant Ipoh
- 心安素食斋料食馆 Xin An Vegetarian Cafe

In [None]:
print("Welcome Sir/Madam!\nThis session is about Knowing Who Rated A Given Restaurant")
print("----------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")

restaurantName = input("Please provide a restaurant name: ")

In [None]:
if restaurantName in list(df_data2["Restaurant"]):
    print("Restaurant Name: " + restaurantName)
    print("=================================================================")
    print("Here's the OTHER Author(s) Who Rated This Restaurant:-")
    print("=================================================================")
else: 
    print("No Restaurants Found...")

In [None]:
# Display author listing
authors = get_Author(restaurantName)
print(tabulate(authors.head(10), headers = ['Author', 'Rating'], tablefmt = 'fancy_outline', floatfmt = '.1f'))

## 1.1 K-NN Item-based

In [None]:
# Define the index for Restaurant Name For Later Recommendation
restaurant_author_matrix = df_data2.pivot_table(index = 'Restaurant', columns = ['Author'], values = 'Rating').fillna(0)
restaurant_author_matrix

In [None]:
# Define Compressed Sparse Row (CSR)
# Non-zero elements are stored in compressed row format, meaning that each row is represented by a list of the non-zero elements
# in that row, along with their corresponding column indices.
from scipy.sparse import csr_matrix

csr_matrix_rating = csr_matrix(restaurant_author_matrix)
csr_matrix_rating

**KNN (K-Nearest Neighbors)** is a machine learning algorithm that can be used for classification or regression tasks. It finds the k-nearest neighbors of a data point based on a distance metric, and then uses those neighbors to make a prediction.

When working with large datasets, it is often the case that the majority of the data is sparse, meaning that most of the values are zero. In this case, it can be more efficient to store the data in a **sparse matrix** format such as csr_matrix.

**Using csr_matrix with KNN** can help to reduce the memory usage and computational time required for finding the nearest neighbors. This is because the csr_matrix format only stores the non-zero elements and their locations, which can greatly reduce the size of the data in memory.

In [None]:
# Define K-Nearest Neighbour (KNN)
# Unsupervised machine learning algorithm used for finding the k-nearest neighbors of a given data point in a dataset. 
# It is commonly used for recommendation systems, image recognition, and anomaly detection.
from sklearn.neighbors import NearestNeighbors

KNN_Model = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
KNN_Model.fit(csr_matrix_rating)

In [None]:
def recommend(restaurant_name, k_value):
    distances, indices = KNN_Model.kneighbors(restaurant_author_matrix[restaurant_author_matrix.index == restaurant_name].values.reshape(1,-1), n_neighbors = int(k_value) + 1)
    x = PrettyTable()
    x.field_names = ['No.', 'Restaurant Name', 'Distance']
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print("K-Nearest Neighbouring (KNN) Restaurant Recommendations :")
            print("=======================================================================================================")
            print("Recommendations Based On Restaurant Name: " + restaurant_name)
            print("=======================================================================================================")
        else:
            name = restaurant_author_matrix.index[indices.flatten()[i]]
            distance_points = distances.flatten()[i]
#             print(f"{i}: {name} \t\t\t\t, with distance of {distance_points}:")
            x.add_row([i, name, distance_points])
    print(x)
    # Exclude one from the k neighbours to get the number of the rest restaurants
    noOfRestaurants = int(k_value)
    print("=======================================================================================================")
    print("                               You've Suggested To View " + str(noOfRestaurants) + " of Restaurants")
    print("                =======================================================================")

### User Interactions Section: result for K-NN Item based


Sample data:
- AQUA Restaurant & Bar
- Ah Hua Kuey Teow
- Ah Chui Seafood
- Anderson Curry House
- Amy Heritage Nyonya Cuisine

In [1]:
print("Welcome Sir/Madam!\nThis session is about Restaurant Recommendation Based on K-Nearest Neighbouring (KNN)")
print("-------------------------------------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")
restaurant_name = input("Please enter the restaurant name: ")

print("\nNOTE: Please provide in NUMBER")
print("Please enter the number of restaurants (K-Value) for us to recommend to you: ")
k_value = -1
while k_value < 1:
    try:
        k_value = int(input())
    except ValueError:
        print("Please enter an integer")
        continue
    if k_value <= 0:
        print("Please enter positive integer only")

Welcome Sir/Madam!
This session is about Restaurant Recommendation Based on K-Nearest Neighbouring (KNN)
-------------------------------------------------------------------------------------
NOTE: Restaurant NAME must be in full form
Please enter the restaurant name: Ah Hua Kuey Teow

NOTE: Please provide in NUMBER
Please enter the number of restaurants (K-Value) for us to recommend to you: 
b
Please enter an integer
0
Please enter positive integer only
-1
Please enter positive integer only
10


In [2]:
recommend(restaurant_name, k_value)

NameError: name 'recommend' is not defined

## 1.2 Pearson Correlation Item based

#### Correlation Between A Selected Restaurant and All Other Restaurants

Sample Data:
- 我家餐館Our Kitchen Nyonya Restaurant
- 1919 Restaurant Ipoh
- 心安素食斋料食馆 Xin An Vegetarian Cafe

In [None]:
print("Welcome Sir/Madam!\nThis session is about Most Similar Restaurant Recommendation")
print("------------------------------------------------------------")
print("NOTE: Restaurant NAME must be in full form")

restaurant_name = input("Please provide a restaurant name: ")

In [None]:
def get_Author(restaurantName):
    author_ratings = author_restaurant_matrix[restaurantName]
    return author_ratings

In [None]:
# Use get_Author Function Defined in A Section
author_ratings = get_Author(restaurant_name)
print(tabulate(author_ratings.head(10).to_frame(), headers = ['Author', 'Rating'], tablefmt = 'fancy_outline', floatfmt = '.1f'))
# author_ratings.head(10)

In [None]:
similar_restaurants = author_restaurant_matrix.corrwith(author_ratings)

# Create a dataframe with similar restaurants as the index column and name another column as correlation
similar_restaurants = pd.DataFrame(similar_restaurants, columns = ['Correlation'])
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")

# similar_restaurants.head(10)
print(tabulate(similar_restaurants.head(10), headers = ['Restaurants', 'Rating'], tablefmt = 'fancy_outline'))

### Recommending The Most Similar Restaurants

In [None]:
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")

sorted_restaurants = similar_restaurants.sort_values(by = 'Correlation', ascending = False)
print(tabulate(sorted_restaurants[1:11], headers = ['Restaurants', 'Rating'], tablefmt = 'fancy_outline'))

For the restaurant name that is passed to the pivot table, the correlation between this restaurant and itself always 1; while other restaurants are correlated to it.

### Recommending The Most Similar and Popular Restaurants
To determine the correlation strength of a restaurant's recommendation, it is necessary to consider the number of ratings, as a higher number of ratings is indicative of greater popularity.

In [None]:
df_rating = pd.DataFrame(df_data2.groupby('Restaurant')['Rating'].count())  
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("==========================================================================================")
df_rating.head(10)
print(tabulate(df_rating.head(10), headers = ['Restaurants', 'Rating Count'], tablefmt = 'fancy_outline'))

In [3]:
noOfRestaurants = -1
print("Enter the number of recommendations you want: ")

while noOfRestaurants < 1:
    try:
        noOfRestaurants = int(input())
    except ValueError:
        print("Please enter an integer")
        continue
    if noOfRestaurants <= 0:
        print("Please enter positive integer only")
similar_popular_restaurants = similar_restaurants.join(df_rating['Rating'], on = 'Restaurant', how = 'left').sort_values(by = 'Correlation', ascending = False)
print("==========================================================================================")
print("💠Correlation-Based Recommendation Based on Restaurant: " + restaurant_name)
print("💠Total NUmber of Restaurants Suggested to you: " + str(noOfRestaurants))
print("==========================================================================================")
similar_popular_restaurants
print(tabulate(similar_popular_restaurants.iloc[1:noOfRestaurants + 1], headers = ['Restaurants', 'Correlation Value', 'Rating Count'], tablefmt = 'fancy_outline'))

Enter the number of recommendations you want: 
b
Please enter an integer
0
Please enter positive integer only
-1
Please enter positive integer only
5


NameError: name 'similar_restaurants' is not defined

## User-based Collaborative Filtering 


In [None]:
author_restaurant_matrix = df_data2.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_matrix.head()

In [None]:
restaurant_author_matrix = author_restaurant_matrix.transpose()
restaurant_author_matrix.head(10)

# Pearson Correlation User based

#### Recommend Based On Correlations between An Author and All Other Authors

In [None]:
restaurant_ratings = restaurant_author_matrix['Ooi Chuan Chong']
print(tabulate(restaurant_ratings.head(10).to_frame(), headers = ['Restaurants', 'Rating'], tablefmt = 'fancy_outline', floatfmt = '.1f'))

In [None]:
restaurant_ratings = restaurant_author_matrix['Jay Chin']
print(tabulate(restaurant_ratings.head(10).to_frame(), headers = ['Restaurants', 'Rating'], tablefmt = 'fancy_outline', floatfmt = '.1f'))

In [None]:
# This is to find correlation between author based on the ratings of restaurants
similar_authors = restaurant_author_matrix.corrwith(restaurant_ratings)

# Create a dataframe with similar restaurants as the index column and correlation as another column
similar_authors = pd.DataFrame(similar_authors, columns = ['Correlation'])
similar_authors.head(10)

print(tabulate(similar_authors.head(10), headers = ['Author', 'Correlation'], tablefmt = 'fancy_outline', floatfmt = '.6f'))

### Recommending Restaurants Based on The Most Similar Author

In [None]:
# Recommending Restaurants Based on The Most Similar Authors
most_similar_authors = similar_authors.sort_values('Correlation', ascending = False).iloc[1:11]
most_similar_authors
print(tabulate(most_similar_authors, headers = ['Author', 'Correlation'], tablefmt = 'fancy_outline', floatfmt = '.1f'))

In [None]:
# Extract Author of the most similar author
authors = most_similar_authors.index.values.tolist()

recommendation = df_data2[df_data2['Author'] == authors[0]]

print("🟧 " + authors[0])
recommendation

#### b. Recommend Based On Cosine Similarity

In [None]:
# Drop authors who vote less than 50 times.
new_data = df_data2[df_data2['Author'].map(df_data2['Author'].value_counts()) > 50] 
author_restaurant_pivot = new_data.pivot_table(index = 'Author', columns = ['Restaurant'], values = 'Rating').fillna(0)
author_restaurant_pivot.head()

In [None]:
# Get the current author's favorite restaurants based on highest ratings
def users_choice(id, noOfRestaurants):
    users_fav = new_data[new_data["Author"] == id].sort_values(["Rating"], ascending = False)[0:int(noOfRestaurants)]
    return users_fav

In [None]:
# Perform consine similarity matrix
from sklearn.metrics.pairwise import cosine_similarity

def user_based(new_data, id, noOfRestaurants):
    user_recommend = []
    
    if id not in new_data["Author"].values:
        print("❌  No Such Authors Found: " + id)       
    else:
        index = np.where(author_restaurant_pivot.index == id)[0][0]
        similarity = cosine_similarity(author_restaurant_pivot)
        similar_users = list(enumerate(similarity[index]))
        similar_users = sorted(similar_users, key = lambda x:x[1],reverse = True)[0:noOfRestaurants]
    
        for i in similar_users:
            data = df_data2[df_data2["Author"] == author_restaurant_pivot.index[i[0]]]
            user_recommend.extend(list(data.drop_duplicates("Author")["Author"].values))
        
    return user_recommend

In [None]:
def similar_restaurant(new_data, user, author_name, noOfRestaurants):
    x = new_data[new_data["Author"] == author_name]
    recommend_df = []
    
    user = list(user)
    
    for i in user:
        y = new_data[(new_data["Author"] == i)]
        restaurants = y.loc[~y["Restaurant"].isin(x["Restaurant"]), :]
        restaurants = restaurants.sort_values(["Rating"], ascending = False)[0:5]
        recommend_df.extend(restaurants["Restaurant"].values)
        
    return recommend_df[0:noOfRestaurants]

In [None]:
def recommend_user_based(author_name, noOfRestaurants): 
    user_choice_df = pd.DataFrame(users_choice(author_name, noOfRestaurants))
    
    print("🟧 Author: {} ".format(author_name))
    
    user_based_recommendation = user_based(new_data, author_name, noOfRestaurants)
    restaurants_for_user = similar_restaurant(new_data, user_based_recommendation, author_name, noOfRestaurants)
    restaurant_for_user_df = pd.DataFrame(restaurants_for_user, columns = ["Restaurant"])
    
    return restaurant_for_user_df

### User Interaction Sessions

Sample Author Data:
- Adrian Yong
- William Tan

Sample No Found Data:
- Any of the value
- GGGGGGGGGG

In [None]:
print("Welcome Sir/Madam!\nThis session is about WHAT Recommendation On Most Similar Authors Would Also Liked")
print("----------------------------------------------------------------------------------")
print("NOTE: Author must be in full name")
author_name = input("Please provide an author name: ")

print("\nNOTE: Please provide in NUMBER")
noOfRestaurants = -1
print("Please enter the number of restaurants for us to recommend for you: ")

while noOfRestaurants < 1:
    try:
        noOfRestaurants = int(input())
    except ValueError:
        print("Please enter an integer")
        continue
    if noOfRestaurants <= 0:
        print("Please enter positive integer only")

Welcome Sir/Madam!
This session is about WHAT Recommendation On Most Similar Authors Would Also Liked
----------------------------------------------------------------------------------
NOTE: Author must be in full name
Please provide an author name: b

NOTE: Please provide in NUMBER
Please enter the number of restaurants for us to recommend for you: 
0
Please enter positive integer only
-1
Please enter positive integer only
b
Please enter an integer
aasdsadsa
Please enter an integer
sbababa
Please enter an integer


In [None]:
recommend_user_based(author_name, noOfRestaurants)