In [1]:
pip install pandas scikit-surprise scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split, cross_validate

# Load the interactions dataset (user-recipe interactions)
df_interactions = pd.read_csv('interactions_train.csv')

# Display the first few rows
print(df_interactions.head())


   user_id  recipe_id        date  rating      u       i
0     2046       4684  2000-02-25     5.0  22095   44367
1     2046        517  2000-02-25     5.0  22095   87844
2     1773       7435  2000-03-13     5.0  24732  138181
3     1773        278  2000-03-13     4.0  24732   93054
4     2046       3431  2000-04-07     5.0  22095  101723


In [5]:
# Assuming ratings are on a scale of 1 to 5
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_interactions[['user_id', 'recipe_id', 'rating']], reader)


In [6]:
trainset, testset = train_test_split(data, test_size=0.2)


In [7]:
# Use SVD algorithm
model = SVD()

# Train the model on the trainset
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fbf38a1abb0>

In [8]:
# Test the model on the testset
predictions = model.test(testset)

# Calculate RMSE
rmse = accuracy.rmse(predictions)
print(f'RMSE: {rmse}')


RMSE: 0.9209
RMSE: 0.9208569766558821


In [9]:
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9255  0.9229  0.9201  0.9192  0.9151  0.9206  0.0035  
MAE (testset)     0.5460  0.5437  0.5431  0.5432  0.5422  0.5436  0.0013  
Fit time          6.39    5.55    6.54    5.99    6.14    6.12    0.34    
Test time         1.06    0.76    0.93    0.82    0.83    0.88    0.11    


{'test_rmse': array([0.92549468, 0.92285478, 0.92013633, 0.91922466, 0.91512935]),
 'test_mae': array([0.54598202, 0.5437092 , 0.54306234, 0.54315257, 0.54218514]),
 'fit_time': (6.388487100601196,
  5.551939964294434,
  6.537539958953857,
  5.987312078475952,
  6.13904881477356),
 'test_time': (1.0622057914733887,
  0.7603790760040283,
  0.9307613372802734,
  0.8203959465026855,
  0.8341138362884521)}

In [11]:
user_id = 'example_user_id'
recipe_id = 'example_recipe_id'

# Predict rating
prediction = model.predict(user_id, recipe_id)
print(f"Predicted rating: {prediction.est}")


Predicted rating: 4.573446534828776
