In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
  
#loading rating dataset
ratings = pd.read_csv("C:/Users/Garion/Desktop/user preference.csv")
print(ratings.head())

   userId  restaurantId  rating
0       1           111     3.0
1       1           107     4.5
2       1            65     4.6
3       1            35     3.6
4       1            86     4.0


In [3]:
# loading restaurant dataset
restaurant = pd.read_csv("C:/Users/Garion/Desktop/Data for ML.csv")
print(restaurant.head())

   restaurantId                              restaurant  \
0             1                              Café Natsu   
1             2    Mr. Holmes Bakehouse (Pacific Plaza)   
2             3  Craftsmen Coffee (Clarke Quay Central)   
3             4                            hay, gelato.   
4             5                               Refuel II   

                         genre price range  
0                japanese|cafe    moderate  
1         bakery|café|American    moderate  
2          coffee|café|western    moderate  
3                      dessert       cheap  
4  chinese|indian|western|cafe    moderate  


In [4]:
   
n_ratings = len(ratings)
n_restaurant = len(ratings['restaurantId'].unique())
n_users = len(ratings['userId'].unique())
 
print(f"Number of ratings: {n_ratings}")
print(f"Number of unique restaurantId's: {n_restaurant}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per restaurant: {round(n_ratings/n_restaurant, 2)}")

Number of ratings: 84
Number of unique restaurantId's: 69
Number of unique users: 15
Average ratings per user: 5.6
Average ratings per restaurant: 1.22


In [5]:
   
user_freq = ratings[['userId', 'restaurantId']].groupby(
    'userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
print(user_freq.head())

   userId  n_ratings
0       1          5
1       2          6
2       3          6
3       4          5
4       5          5


In [6]:
# Find Lowest and Highest rated restaurants:
mean_rating = ratings.groupby('restaurantId')[['rating']].mean()
# Lowest rated restaurants
lowest_rated = mean_rating['rating'].idxmin()
restaurant.loc[restaurant['restaurantId'] == lowest_rated]
# Highest rated restaurants
highest_rated = mean_rating['rating'].idxmax()
restaurant.loc[restaurant['restaurantId'] == highest_rated]
# show number of people who rated restaurants rated restaurant highest
ratings[ratings['restaurantId']==highest_rated]
# show number of people who rated restaurants rated restaurant lowest
ratings[ratings['restaurantId']==lowest_rated]
 
## the above restaurants has very low dataset. We will use bayesian average
restaurant_stats = ratings.groupby('restaurantId')[['rating']].agg(['count', 'mean'])
restaurant_stats.columns = restaurant_stats.columns.droplevel()

In [7]:
   
# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix
 
def create_matrix(df):
     
    N = len(df['userId'].unique())
    M = len(df['restaurantId'].unique())
     
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    restaurant_mapper = dict(zip(np.unique(df["restaurantId"]), list(range(M))))
     
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    restaurant_inv_mapper = dict(zip(list(range(M)), np.unique(df["restaurantId"])))
     
    user_index = [user_mapper[i] for i in df['userId']]
    restaurant_index = [restaurant_mapper[i] for i in df['restaurantId']]
 
    X = csr_matrix((df["rating"], (restaurant_index, user_index)), shape=(M, N))
     
    return X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper
     
X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper = create_matrix(ratings)
#print(X)
#print("")
#print(user_mapper)
#print(user_inv_mapper)
#print(restaurant_mapper)
#print(restaurant_inv_mapper)

In [8]:
"""
Find similar restaurants using KNN
"""
from sklearn.neighbors import NearestNeighbors
def find_similar_restaurant(restaurant_id, X, k, metric='cosine', show_distance=False):
     
    neighbour_ids = []
     
    restaurant_ind = restaurant_mapper[restaurant_id]
    restaurant_vec = X[restaurant_ind]
    #print(restaurant_vec)
    k += 1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    restaurant_vec = restaurant_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(restaurant_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(restaurant_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids
 
 
restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
 
restaurant_id = 7
 
similar_ids = find_similar_restaurant(restaurant_id, X, k=10)
restaurant_title = restaurant_titles[restaurant_id]
 
print(f"Since you watched {restaurant_title}")
for i in similar_ids:
    print(restaurant_titles[i])

Since you watched Jimmy Monkey Cafe & Bar
Sushi Kimura
Itacho Sushi
Muk-Bang Korean Restaurant
Kei Kaisendon
Brawn & Brains
Seoul Garden
Baci Baci Restaurant
The Sushi Bar
The Assembly Ground, Cineleisure (Somerset)
Nickel


In [9]:
def recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10):
    frames = []
    #print(ratings.loc[ratings.userId == 1].empty)
    for id in user_id:
        #print(id)
        if not ratings.loc[ratings.userId == id].empty:
        #if ratings.userId.isin([id]).all():
            frames.append(ratings[ratings['userId'] == id])
            df1 = pd.concat(frames)
        else:
            print(f"one of the User with ID {id} does not exist.")
            return
    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return
    #print(frames)
    
    #print("")
    print(df1)
    print("")
    restaurant_id = df1[df1['rating'] == max(df1['rating'])]['restaurantId'].iloc[0]
 
    restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
 
    similar_ids = find_similar_restaurant(restaurant_id, X, k)
    restaurant_title = restaurant_titles.get(restaurant_id, "restaurant not found")
 
    if restaurant_title == "restaurant not found":
        print(f"restaurant with ID {restaurant_id} not found.")
        return
 
    print(f"Since you ate at {restaurant_title}, you might also like:")
    for i in similar_ids:
        print(restaurant_titles.get(i, "restaurant not found"))

In [10]:
user_id = {1,2}  # Replace with the desired user ID
recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10)

    userId  restaurantId  rating
0        1           111     3.0
1        1           107     4.5
2        1            65     4.6
3        1            35     3.6
4        1            86     4.0
5        2           106     3.3
6        2            91     4.2
7        2             8     3.8
8        2            83     4.3
9        2            65     4.0
10       2           130     4.9

Since you ate at Nangfa Thai Kitchen, you might also like:
Harry's
OLLA Specialty Coffee (Clementi)
Tipo Pasta Bar
Sawadee Thai Cuisine
Yamagawa Japanese Restaurant, Beach Road
Kra pow Thai Restaurant
Coexist Coffee Co.
Tora Tora Tora Japanese Restaurant Singapore
Hansik Korean Restaurant
OTOKO Japanese Restaurant


In [11]:
user_id = {2300}  # Replace with the desired user ID
recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10)

one of the User with ID 2300 does not exist.


In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
#loading rating dataset
ratings = pd.read_csv("C:/Users/Garion/Desktop/user preference.csv")
print(ratings.head())
# loading restaurant dataset
restaurant = pd.read_csv("C:/Users/Garion/Desktop/Data for ML.csv")
print(restaurant.head())
n_ratings = len(ratings)
n_restaurant = len(ratings['restaurantId'].unique())
n_users = len(ratings['userId'].unique())
 
print(f"Number of ratings: {n_ratings}")
print(f"Number of unique restaurantId's: {n_restaurant}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per restaurant: {round(n_ratings/n_restaurant, 2)}") 
user_freq = ratings[['userId', 'restaurantId']].groupby(
    'userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
print(user_freq.head())
# Find Lowest and Highest rated restaurants:
mean_rating = ratings.groupby('restaurantId')[['rating']].mean()
# Lowest rated restaurants
lowest_rated = mean_rating['rating'].idxmin()
restaurant.loc[restaurant['restaurantId'] == lowest_rated]
# Highest rated restaurants
highest_rated = mean_rating['rating'].idxmax()
restaurant.loc[restaurant['restaurantId'] == highest_rated]
# show number of people who rated restaurants rated restaurant highest
ratings[ratings['restaurantId']==highest_rated]
# show number of people who rated restaurants rated restaurant lowest
ratings[ratings['restaurantId']==lowest_rated]
 
## the above restaurants has very low dataset. We will use bayesian average
restaurant_stats = ratings.groupby('restaurantId')[['rating']].agg(['count', 'mean'])
restaurant_stats.columns = restaurant_stats.columns.droplevel()  
# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix
 
def create_matrix(df):
     
    N = len(df['userId'].unique())
    M = len(df['restaurantId'].unique())
     
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    restaurant_mapper = dict(zip(np.unique(df["restaurantId"]), list(range(M))))
     
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    restaurant_inv_mapper = dict(zip(list(range(M)), np.unique(df["restaurantId"])))
     
    user_index = [user_mapper[i] for i in df['userId']]
    restaurant_index = [restaurant_mapper[i] for i in df['restaurantId']]
 
    X = csr_matrix((df["rating"], (restaurant_index, user_index)), shape=(M, N))
     
    return X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper
     
X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper = create_matrix(ratings)
print(X)
print("")
print(user_mapper)
print( restaurant_mapper)
print(user_inv_mapper)
print(restaurant_inv_mapper)
"""
Find similar restaurants using KNN
"""
from sklearn.neighbors import NearestNeighbors
def find_similar_restaurant(restaurant_id, X, k, metric='cosine', show_distance=False):
     
    neighbour_ids = []
    print(restaurant_id)
    print(X)
    restaurant_ind = restaurant_mapper[restaurant_id]
    print("rest id")
    print(restaurant_ind)
    restaurant_vec = X[restaurant_ind]
    print(restaurant_vec)
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    restaurant_vec = restaurant_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(restaurant_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(restaurant_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids, kNN
 
 
restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
 
restaurant_id = 8
 
similar_ids, kNN_model= find_similar_restaurant(restaurant_id, X, k=10)
restaurant_title = restaurant_titles[restaurant_id]
 
print(f"Since you watched {restaurant_title}")
for i in similar_ids:
    print(restaurant_titles[i])

def recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10):
    frames = []
    #print(ratings.loc[ratings.userId == 1].empty)
    for id in user_id:
        print(id)
        if not ratings.loc[ratings.userId == id].empty:
        #if ratings.userId.isin([id]).all():
            frames.append(ratings[ratings['userId'] == id])
            df1 = pd.concat(frames)
        else:
            print(f"one of the User with ID {id} does not exist.")
            return
    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return
    print(frames)
    
    print("")
    print(df1)
    restaurant_id = df1[df1['rating'] == max(df1['rating'])]['restaurantId'].iloc[0]
 
    restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
 
    similar_ids, Knn_model = find_similar_restaurant(restaurant_id, X, k)
    restaurant_title = restaurant_titles.get(restaurant_id, "restaurant not found")
 
    if restaurant_title == "restaurant not found":
        print(f"restaurant with ID {restaurant_id} not found.")
        return
 
    #print(f"Since you ate at {restaurant_title}, you might also like:")
    #for i in similar_ids:
    #    print(restaurant_titles.get(i, "restaurant not found"))
    return similar_ids, restaurant_titles, restaurant_title, Knn_model
user_id = [1,2]  # Replace with the desired user ID
print(similar_ids)
print(restaurant_titles)
similar_ids, restaurant_titles, restaurant_title, Knn_model = recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10)

print(f"Since you ate at {restaurant_title}, you might also like:")
for i in similar_ids:
    print(restaurant_titles.get(i, "restaurant not found"))
    
#import joblib
#import os

# Check if the directory exists, if not, create it.
#if not os.path.exists('models'):
#    os.makedirs('models')

# Save the model
similar_ids, kNN_model = find_similar_restaurant(7, X, k=10)
print(kNN_model)
#joblib.dump(kNN_model, 'models/knn_model.joblib')

In [None]:
user_id = {2300}  # Replace with the desired user ID
recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10)

In [None]:
knn_model = joblib.load('models/knn_model.joblib')
print(knn_model)
# Define a new data point
X_new = [[1, 2, 3, 4, 5,6,7,8,9,10,11,12,13,14,15]]

k = 5

M = len(ratings['restaurantId'].unique())

restaurant_inv_mapper = dict(zip(list(range(M)), np.unique(ratings["restaurantId"])))
#X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper = create_matrix(ratings)
#indices = knn_model.kneighbors(X_new, return_distance=False)[0]

neighbour_ids = []

neighbour = knn_model.kneighbors(X_new, return_distance=False)
for i in range(0,5):
    n = neighbour.item(i)
    neighbour_ids.append(restaurant_inv_mapper[n])


restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
print(restaurant_titles)
print("")
print(neighbour)
# Get the labels of the k nearest neighbors
#labels = knn_model.labels_[indices]

# Make a prediction using the labels of the k nearest neighbors
for i in neighbour_ids:
    print(restaurant_titles.get(i, "restaurant not found"))
#prediction = max(set(labels), key=labels.count)

#print(predition)


In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
import joblib
import os
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dot, Dense, Flatten, Concatenate
from tensorflow.keras.optimizers import Adam

#loading rating dataset
ratings = pd.read_csv("C:/Users/Garion/Desktop/user preference.csv")

# loading restaurant dataset
restaurant = pd.read_csv("C:/Users/Garion/Desktop/Data for ML.csv")

# Create user and restaurant embeddings
user_embedding = Embedding(input_dim=len(ratings['userId'].unique()), output_dim=50)
restaurant_embedding = Embedding(input_dim=len(ratings['restaurantId'].unique()), output_dim=50)

# Flatten the embeddings
user_embedding = Flatten()(user_embedding(tf.convert_to_tensor(ratings['userId'].unique())))
restaurant_embedding = Flatten()(restaurant_embedding(tf.convert_to_tensor(ratings['restaurantId'].unique())))

# Calculate the dot product between the user and restaurant embeddings
dot_product = Dot(axes=1)([user_embedding, restaurant_embedding])

# Add a bias term
dot_product = Dot(axes=1, normalize=False)([user_embedding, restaurant_embedding])

# Add a dense layer to output the ratings
output = Dense(1, activation='linear')(dot_product)

# Create the model
model = Model(inputs=[user_embedding, restaurant_embedding], outputs=output)

# Compile the model
model.compile(optimizer=Adam(lr=0.001), loss='mse')

# Save the model
if not os.path.exists('models'):
    os.makedirs('models')

model.save('models/restaurant_rating_model.h5')

In [None]:
# Importing Libraries
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import joblib
import os

warnings.simplefilter(action='ignore', category=FutureWarning)

#loading rating dataset
ratings = pd.read_csv("C:/Users/Garion/Desktop/user preference.csv")

# loading restaurant dataset
restaurant = pd.read_csv("C:/Users/Garion/Desktop/Data for ML.csv")

n_ratings = len(ratings)
n_restaurant = len(ratings['restaurantId'].unique())
n_users = len(ratings['userId'].unique())

print(f"Number of ratings: {n_ratings}")
print(f"Number of unique restaurantId's: {n_restaurant}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per restaurant: {round(n_ratings/n_restaurant, 2)}") 

user_freq = ratings[['userId', 'restaurantId']].groupby(
    'userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']

# Find Lowest and Highest rated restaurants:
mean_rating = ratings.groupby('restaurantId')[['rating']].mean()

# Now, we create user-item matrix using scipy csr matrix
from scipy.sparse import csr_matrix

def create_matrix(df):
     
    N = len(df['userId'].unique())
    M = len(df['restaurantId'].unique())
     
    # Map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    restaurant_mapper = dict(zip(np.unique(df["restaurantId"]), list(range(M))))
     
    # Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    restaurant_inv_mapper = dict(zip(list(range(M)), np.unique(df["restaurantId"])))
     
    user_index = [user_mapper[i] for i in df['userId']]
    restaurant_index = [restaurant_mapper[i] for i in df['restaurantId']]
 
    X = csr_matrix((df["rating"], (restaurant_index, user_index)), shape=(M, N))
     
    return X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper
     
X, user_mapper, restaurant_mapper, user_inv_mapper, restaurant_inv_mapper = create_matrix(ratings)

from sklearn.neighbors import KNeighborsClassifier

def find_similar_restaurant(restaurant_id, X, k, metric='cosine', show_distance=False):
     
    restaurant_ind = restaurant_mapper[restaurant_id]
    
    restaurant_vec = X[restaurant_ind]
    k += 1
    
    # Initialize the KNeighborsClassifier with the 'cosine' metric and 10 neighbors
    knn = KNeighborsClassifier(n_neighbors=k, metric='cosine')
    
    # Fit the classifier to the data
    knn.fit(X, np.arange(X.shape[0]))
    
    # Predict the similarity scores for the target restaurant vector
    similarity_scores = knn.predict_proba(restaurant_vec.reshape(1, -1))
    
    # Get the indices of the top 10 similar restaurants
    similar_indices = np.argsort(similarity_scores.flatten())[::-1][1:11]
    
    # Map the indices to the corresponding restaurant IDs
    similar_ids = [restaurant_inv_mapper[i] for i in similar_indices]
    
    joblib.dump(knn, 'models/knn_model.joblib')
    
    return similar_ids, knn

restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))

restaurant_id = 7

similar_ids, knn = find_similar_restaurant(restaurant_id, X, k=10)

print(f"Since you watched {restaurant_titles[restaurant_id]}, you might also like:")
for i in similar_ids:
    print(restaurant_titles.get(i, "restaurant not found"))



def recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10):
    frames = []
    #print(ratings.loc[ratings.userId == 1].empty)
    for id in user_id:
        print(id)
        if not ratings.loc[ratings.userId == id].empty:
        #if ratings.userId.isin([id]).all():
            frames.append(ratings[ratings['userId'] == id])
            df1 = pd.concat(frames)
        else:
            print(f"one of the User with ID {id} does not exist.")
            return
    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return
    print(frames)
    
    print("")
    print(df1)
    restaurant_id = df1[df1['rating'] == max(df1['rating'])]['restaurantId'].iloc[0]
 
    restaurant_titles = dict(zip(restaurant['restaurantId'], restaurant['restaurant']))
 
    similar_ids, KNN = find_similar_restaurant(restaurant_id, X, k)
    restaurant_title = restaurant_titles.get(restaurant_id, "restaurant not found")
 
    if restaurant_title == "restaurant not found":
        print(f"restaurant with ID {restaurant_id} not found.")
        return
 
    #print(f"Since you ate at {restaurant_title}, you might also like:")
    #for i in similar_ids:
    #    print(restaurant_titles.get(i, "restaurant not found"))
    return similar_ids, restaurant_titles, restaurant_title, KNN

user_id = [1,2]  # Replace with the desired user ID
print(similar_ids)
print(restaurant_titles)
similar_ids, restaurant_titles, restaurant_title, KNN = recommend_restaurants_for_user(user_id, X, user_mapper, restaurant_mapper, restaurant_inv_mapper, k=10)

print(f"Since you ate at {restaurant_title}, you might also like:")
for i in similar_ids:
    print(restaurant_titles.get(i, "restaurant not found"))

print("")
print(KNN)


In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType