In [2]:
import numpy as np
import pandas as pd
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from surprise import accuracy

from models import get_top_n
from loaders import load_ratings, load_items

import optuna


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df_ratings = load_ratings()
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(df_ratings[['userId', 'movieId', 'rating']], reader)

# optuna => optimisation du RMSE

In [5]:
def objective(trial):
    # Suggest hyperparameters
    n_factors = trial.suggest_categorical("n_factors", [20, 50, 100])
    lr_all = trial.suggest_float("lr_all", 0.001, 0.02, log=True)
    reg_all = trial.suggest_float("reg_all", 0.01, 0.1)

    algo = SVD(n_factors=n_factors, lr_all=lr_all, reg_all=reg_all)

    # Use train/test split for speed
    trainset, testset = train_test_split(data, test_size=0.2)
    algo.fit(trainset)
    predictions = algo.test(testset)

    return accuracy.rmse(predictions, verbose=False)


In [7]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30, timeout=300)  # timeout en secondes

[I 2025-06-04 16:41:42,278] A new study created in memory with name: no-name-84a1abe7-a197-430b-a332-cb3676fc5c97
[I 2025-06-04 16:41:50,889] Trial 0 finished with value: 0.8183088269710818 and parameters: {'n_factors': 50, 'lr_all': 0.00607913035103462, 'reg_all': 0.08304128893122487}. Best is trial 0 with value: 0.8183088269710818.
[I 2025-06-04 16:41:59,401] Trial 1 finished with value: 0.7985145282044015 and parameters: {'n_factors': 50, 'lr_all': 0.00728437562643609, 'reg_all': 0.04725251563714284}. Best is trial 1 with value: 0.7985145282044015.
[I 2025-06-04 16:42:07,448] Trial 2 finished with value: 0.8220956280131422 and parameters: {'n_factors': 20, 'lr_all': 0.004621617820108475, 'reg_all': 0.04007343067179382}. Best is trial 1 with value: 0.7985145282044015.
[I 2025-06-04 16:42:15,229] Trial 3 finished with value: 0.7974534173490841 and parameters: {'n_factors': 20, 'lr_all': 0.019267494023032433, 'reg_all': 0.0859941713543075}. Best is trial 3 with value: 0.797453417349084

In [None]:
print("Best RMSE:", study.best_value)
print("Best params:", study.best_params)


Parameters: {'n_factors': 100, 'lr_all': 0.01998786951309632, 'reg_all': 0.059635281166102956}. Best is trial 23 with value: 0.7903290786683458.

In [None]:
# best_params = study.best_params
# algo = SVD(**best_params)

# trainset = data.build_full_trainset()
# algo.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1405b265480>

In [5]:
algo = SVD(n_factors=100, lr_all=0.0199878, reg_all=0.05963528)
trainset = data.build_full_trainset()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x231ebf6bd30>

=> RMSE avec l'antit_Testset

In [6]:
anti_testset = trainset.build_anti_testset()
predictions = algo.test(anti_testset)


In [7]:
print(accuracy.rmse(predictions, verbose=False))

0.5988211669186972
