In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from surprise import Dataset, Reader, SVD, SVDpp, NMF
from surprise.model_selection import cross_validate, GridSearchCV, train_test_split
import random

# ratings_url = "https://files.grouplens.org/datasets/movielens/ml-latest-small/ratings.csv"
# movies_url = "https://files.grouplens.org/datasets/movielens/ml-latest-small/movies.csv"

# ratings = pd.read_csv(ratings_url)
# movies = pd.read_csv(movies_url)

data = Dataset.load_builtin('ml-100k')
raw_ratings = data.raw_ratings

df = pd.DataFrame(raw_ratings, columns=['user', 'item', 'rating', 'timestamp'])
df.head()

# data = pd.merge(ratings, movies, on='movieId')
# print(data.head())

reader = Reader(rating_scale=(0.5, 5.0))
# dataset = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
# dataset = Dataset.load_from_df(data[['userId', 'movieId', 'rating']], reader)

param_grid = {
    'n_factors': [50, 100, 150],
    'n_epochs': [20, 30],
    'lr_all': [0.002, 0.005],
    'reg_all': [0.02, 0.1]
}

gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3, n_jobs=-1)
gs.fit(data)

print("Best RMSE score:", gs.best_score['rmse'])
print("Best parameters:", gs.best_params['rmse'])

best_svd = gs.best_estimator['rmse']
cross_validate(best_svd, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

svdpp = SVDpp()
cross_validate(svdpp, raw_ratings, measures=['RMSE', 'MAE'], cv=3, verbose=True)

nmf = NMF()
cross_validate(nmf, raw_ratings, measures=['RMSE', 'MAE'], cv=3, verbose=True)

def initialize_matrix(n_users, n_items, n_factors):
    np.random.seed(42)
    P = np.random.rand(n_users, n_factors)
    Q = np.random.rand(n_items, n_factors)
    return P, Q

def loss_function(R, P, Q, lambda_reg):
    loss = 0
    for u, i, r_ui in R:
        prediction = np.dot(P[u, :], Q[i, :])
        loss += (r_ui - prediction) ** 2
    loss += lambda_reg * (np.sum(np.square(P)) + np.sum(np.square(Q)))
    return loss

def gradient_descent(R, P, Q, lambda_reg, lr, epochs):
    for epoch in range(epochs):
        for u, i, r_ui in R:
            error = r_ui - np.dot(P[u, :], Q[i, :])
            P[u, :] += lr * (error * Q[i, :] - lambda_reg * P[u, :])
            Q[i, :] += lr * (error * P[u, :] - lambda_reg * Q[i, :])

        loss = loss_function(R, P, Q, lambda_reg)
        print(f"Epoch {epoch + 1}, Loss: {loss}")
    return P, Q

n_users = data['userId'].nunique()
n_items = data['movieId'].nunique()
R = [(int(row['userId']) - 1, int(row['movieId']) - 1, row['rating']) for _, row in raw_ratings.iterrows()]

n_factors = 50
lambda_reg = 0.02
lr = 0.005
epochs = 20

P, Q = initialize_matrix(n_users, n_items, n_factors)
P, Q = gradient_descent(R, P, Q, lambda_reg, lr, epochs)

Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k
Best RMSE score: 0.9330934239416279
Best parameters: {'n_factors': 150, 'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.1}
Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9353  0.9362  0.9291  0.9335  0.0032  
MAE (testset)     0.7413  0.7403  0.7371  0.7396  0.0018  
Fit time          2.51    2.85    2.21    2.53    0.26    
Test time         0.46    0.37    0.26    0.36    0.08    


AttributeError: 'list' object has no attribute 'raw_ratings'

In [2]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357285 sha256=86f99928260fe86fd73297ac5f8f36c93fc886955b3564593c71849d41aff2b3
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Install