# Benchmark with Movielens dataset
The main purpose of this notebook is not to produce comprehensive benchmarking results on multiple datasets. Rather, it is intended to evaluate different recommender algorithms(SVD, LightGCN, Transformer and our algorithm) in this repository.

* Datasets
  * [Movielens 100K](https://grouplens.org/datasets/movielens/100k/).
  * [Movielens 1M](https://grouplens.org/datasets/movielens/1m/).

* Data split
  * TODO
  

* Evaluation metrics
  * Ranking metrics:
    * Precision@k.
    * Recall@k.
    * Normalized discounted cumulative gain@k (NDCG@k).
    * Mean-average-precision (MAP). 
  * Rating metrics:
    * Root mean squared error (RMSE).
    * Mean average error (MAE).
    * R squared.
    * Explained variance.

In [3]:
!pip install torch



In [4]:
import warnings
warnings.filterwarnings("ignore")
import logging
logging.basicConfig(level=logging.ERROR) 

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from utils.dataloader import load_data_df, load_item_df, load_user_features, maybe_download
from models.svd_model import SVDModel
from utils.benchmark import calculate_rating_metrics, calculate_ranking_metrics

In [5]:
def generate_summary(data, algo, k, rating_metrics, ranking_metrics):
    summary = {"Data": data, "Algo": algo, "K": k}
    if rating_metrics is None:
        rating_metrics = {
            "RMSE": np.nan,
            "MAE": np.nan,
            "R2": np.nan,
            "Explained Variance": np.nan,
        }
    if ranking_metrics is None:
        ranking_metrics = {
            "MAP": np.nan,
            "nDCG@k": np.nan,
            "Precision@k": np.nan,
            "Recall@k": np.nan,
        }
    summary.update(rating_metrics)
    summary.update(ranking_metrics)
    return summary

In [7]:
cols = ["Data", "Algo", "K", "RMSE", "MAE", "R2", "Explained Variance", "MAP", "nDCG@k", "Precision@k", "Recall@k"]
df_results = pd.DataFrame(columns=cols)
size = "100k"
svd_model = SVDModel(size, n_factors=200, n_epochs=30)
data = svd_model.prepare_training_data()
svd_model.train()
predictions = svd_model.predict()

ratings = calculate_rating_metrics(svd_model.test_pre, predictions)

top_k_scores = svd_model.recommend_k_svd()
rankings = calculate_ranking_metrics(svd_model.test_pre, top_k_scores, 10)

summary = generate_summary("100k", "svd", 10,  ratings, rankings)
df_results.loc[df_results.shape[0] + 1] = summary



/Users/sun/Desktop/NUS/CS5248/project/movie_recommdender/dinghui101/data/ml-100k/u.data
/Users/sun/Desktop/NUS/CS5248/project/movie_recommdender/dinghui101/data/ml-100k/u.data


In [8]:
df_results

Unnamed: 0,Data,Algo,K,RMSE,MAE,R2,Explained Variance,MAP,nDCG@k,Precision@k,Recall@k
1,100k,svd,10,0.950361,0.748291,0.285657,0.285712,0.015655,0.111098,0.100849,0.034672


In [8]:
# For each data size and each algorithm, a recommender is evaluated. 
cols = ["Data", "Algo", "K", "Train time (s)", "Predicting time (s)", "RMSE", "MAE", "R2", "Explained Variance", "Recommending time (s)", "MAP", "nDCG@k", "Precision@k", "Recall@k"]
df_results = pd.DataFrame(columns=cols)

data_sizes = ["100k", "1m"]
algorithms = ["svd",  "lightgcn"] #Base line algorithms

metrics = {
    "svd": ["rating", "ranking"],
    "lightgcn": ["ranking"]
}

def benchmark_recommenders(data_sizes, algorithms):
    global df_results
    for data_size in data_sizes:
        df = load_data_df(size=data_size)
        print(f"Size of Movielens {data_size}: {df.shape}")
        
        # Split Data set
        train_data, test_data = train_test_split(df, train_size=0.75, random_state=42, shuffle=True)
        print(train_data)
        print(test_data)
        
    #     for algo in algorithms:
    #         print(f"\nComputing {algo} algorithm on Movielens {data_size}")
            
    #         # 准备训练数据
    #         train = prepare_training_data.get(algo, lambda x,y:(x,y))(data_train, data_test)
            
    #         # 获取模型参数并训练
    #         model_params = params[algo]
    #         model, time_train = trainer[algo](model_params, train)
    #         print(f"Training time: {time_train}s")
            
    #         # 评价模型表现
    #         ratings, time_rating, rankings, time_ranking = evaluate_model(algo, model, data_test, train)
            
    #         # 记录结果
    #         summary = generate_summary(data_size, algo, DEFAULT_K, time_train, time_rating, ratings, time_ranking, rankings)
    #         df_results.loc[df_results.shape[0]] = summary

    # return df_results


In [9]:
# 运行基准测试
df_results = benchmark_recommenders(data_sizes, algorithms)
print(df_results)

file is already exist
/Users/sun/Desktop/NUS/CS5248/project/movie_recommdender/dinghui101/data/ml-100k/u.data
Size of Movielens 100k: (100000, 4)
       UserId  MovieId  Rating  Timestamp
98980     811      901       4  886377771
69824     804      755       3  879445305
9928       52      287       5  882922357
75599     735      181       4  876698604
95621     897       96       5  879990430
...       ...      ...     ...        ...
6265      216      231       2  880245109
54886     343      276       5  876403078
76820     437      475       3  880140288
860       284      322       3  885329671
15795     222      200       3  878181647

[75000 rows x 4 columns]
       UserId  MovieId  Rating  Timestamp
75721     877      381       4  882677345
80184     815      602       3  878694269
19864      94      431       4  891721716
76699     416      875       2  876696938
92991     500      182       2  883873556
...       ...      ...     ...        ...
21271     399      684       3