In [1]:
import numpy as np
import pandas as pd
from cmfrec import CMF, ContentBased
from sklearn.model_selection import train_test_split
from collections import defaultdict
from utility_cmfrec import run_cmfrec

Load data

In [2]:
games_features = pd.read_csv('../preprocessed_data/games_features.csv')
users_features = pd.read_csv('../preprocessed_data/users_features.csv')
ratings = pd.read_csv('../preprocessed_data/ratings.csv')


Split data

In [3]:
rec_train, rec_test = train_test_split(ratings,test_size= 0.2, random_state= 42)
users_train,games_train = rec_train["user_id"].unique(), rec_train["app_id"].unique()
rec_test =rec_test.loc[rec_test["user_id"].isin(users_train) & rec_test["app_id"].isin(games_train)]

# Model

Collective Matrix Factorization

In [18]:
cmf = CMF(k = 50,method = "als" ,lambda_ = 20, w_main = 0.6, w_item= 0.4, w_user= 0.4 )
train, test = run_cmfrec(cmf, rec_train, rec_test, U = users_features, I = games_features)

Training the model...
Done. Time taken: 0:00:14.843728 

Evaluating the model with train data...
Time taken: 0:00:12.289018
---------------
Train Data
---------------
RMSE: 0.8355242259476812

MAE: 28.57128497418425


Adding train results to the dictionary...

Evaluating the model with test data...
Time taken: 0:00:04.750186
---------------
Test Data
---------------
RMSE: 0.9444242721263454

MAPE: 32.22118964378746

Precision@10: 0.8145785006775114

Recall@10: 0.6818698298489629

F1@10: 0.7143439362928201

NDCG@5: 0.8353702063758119
Storing the test results in the test dictionary...

---------------------------------------------
Total time taken to run this algorithm: 0:00:31.928638


Other formulation

In [19]:
offset = ContentBased(k = 50, lambda_ = 35,user_bias = True, item_bias= True)
train, test = run_cmfrec(offset, rec_train, rec_test, U = users_features, I = games_features)

Training the model...
Done. Time taken: 0:12:29.173888 

Evaluating the model with train data...
Time taken: 0:00:10.275269
---------------
Train Data
---------------
RMSE: 0.9247938682612628

MAE: 31.20217276726808


Adding train results to the dictionary...

Evaluating the model with test data...
Time taken: 0:00:04.515620
---------------
Test Data
---------------
RMSE: 0.9393888561565027

MAPE: 31.77115485058204

Precision@10: 0.8149806994948843

Recall@10: 0.6748167451728876

F1@10: 0.7114410528484865

NDCG@5: 0.83373034323944
Storing the test results in the test dictionary...

---------------------------------------------
Total time taken to run this algorithm: 0:12:44.006204
