In [65]:
import pandas as pd
import numpy as np

from surprise import accuracy, Dataset, SVD, Reader
from surprise.model_selection import train_test_split, cross_validate

import string

food_data = pd.read_csv('data/food.csv')
ratings_data = pd.read_csv('data/ratings.csv')

def text_cleaning(text):
    text = "".join([char for char in text if char not in string.punctuation])
    return text
food_data['Describe'] = food_data['Describe'].apply(text_cleaning)

def create_soup(x):
  return " ".join([x['Describe'], x['C_Type'], x['Veg_Non']])

food_data['soup'] = food_data.apply(create_soup, axis=1)

reader = Reader(rating_scale=(1, 10))

merged_data = pd.merge(ratings_data, food_data, on='Food_ID')
data = Dataset.load_from_df(merged_data[['User_ID', 'Food_ID', 'Rating']], reader)

# sample random trainset and testset
# test set is made of 25% of the ratings.
trainset, testset = train_test_split(data, test_size=0.25, random_state=42)

# We'll use the famous SVD algorithm.
algo = SVD()

# Train the algorithm on the trainset, and predict ratings for the testset
algo.fit(trainset)
predictions = algo.test(testset)

# Then compute RMSE
accuracy.rmse(predictions)

RMSE: 2.9929


2.992928549574807

In [66]:
predictions

[Prediction(uid=32.0, iid=143.0, r_ui=1.0, est=4.716952356565238, details={'was_impossible': False}),
 Prediction(uid=93.0, iid=130.0, r_ui=5.0, est=6.1030656576187265, details={'was_impossible': False}),
 Prediction(uid=63.0, iid=269.0, r_ui=3.0, est=6.2548584376927465, details={'was_impossible': False}),
 Prediction(uid=13.0, iid=74.0, r_ui=8.0, est=5.8067270341187065, details={'was_impossible': False}),
 Prediction(uid=6.0, iid=22.0, r_ui=5.0, est=4.99900156777005, details={'was_impossible': False}),
 Prediction(uid=94.0, iid=48.0, r_ui=3.0, est=5.881105307237263, details={'was_impossible': False}),
 Prediction(uid=11.0, iid=200.0, r_ui=10.0, est=5.285896538105063, details={'was_impossible': False}),
 Prediction(uid=86.0, iid=159.0, r_ui=2.0, est=5.129920156844303, details={'was_impossible': False}),
 Prediction(uid=100.0, iid=24.0, r_ui=10.0, est=5.244006468230988, details={'was_impossible': False}),
 Prediction(uid=41.0, iid=4.0, r_ui=6.0, est=5.428942711791238, details={'was_impo

Demonstration about how to predict the rating for the user with id=80 and item with id=2

In [67]:
algo.predict(80.0, 2.0)

Prediction(uid=80.0, iid=2.0, r_ui=None, est=5.626110141991366, details={'was_impossible': False})

In [68]:
user_id = 80.0

def get_user_recommendations(user_id, n=5):
    predictions = []
    for item in merged_data['Food_ID'].unique():
        predictions.append(algo.predict(user_id, item))
    
    best_items = sorted([(prediction.iid, prediction.est) for prediction in predictions], key=lambda x: x[1])[::-1][:n]
    best_items_idx = [x for x, y in best_items]
    rec_items = food_data.loc[best_items_idx]
    return rec_items['Name']

print(get_user_recommendations(user_id, n=5))

49                      french pork chop
109         chicken and mushroom lasagna
127                      thai lamb balls
266            red velvet banana pudding
50     christmas chocolate fudge cookies
Name: Name, dtype: object


## Fine-tuning

In [69]:
from surprise.model_selection import GridSearchCV

param_grid = {
  'n_factors': [20, 50, 100, 200, 400, 800, 1000],
  'n_epochs': [5, 10, 20, 50, 100, 200]
}

gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=10)
gs.fit(data)

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

2.8609603176015157
{'n_factors': 800, 'n_epochs': 5}


In [70]:
best_factor = gs.best_params['rmse']['n_factors']
best_epoch = gs.best_params['rmse']['n_epochs']

# We'll use the famous SVD algorithm.
svd = SVD(n_factors=best_factor, n_epochs=best_epoch)

# Train the algorithm on the trainset
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f4798bde700>

In [71]:
cv = cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.9847  2.7165  2.6458  2.9690  3.0546  2.8741  0.1617  
MAE (testset)     2.5687  2.3108  2.2452  2.5770  2.6821  2.4768  0.1684  
Fit time          0.01    0.01    0.01    0.01    0.01    0.01    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    
