In [1]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

#cimport numpy as np # noqa
import numpy as np

from surprise import Reader, AlgoBase, PredictionImpossible
from surprise import Dataset, SVD, SVDpp, NMF
from surprise.utils import get_rng

from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import KFold
from surprise.model_selection import GridSearchCV

from surprise import accuracy

import pandas as pd
import os
import time
import math

from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error, precision_score, recall_score
from math import sqrt

from surprise import CoSVDv9, CoSVDv5

from surprise.model_selection.validation import fit_and_score, print_summary
from joblib import Parallel
from joblib import delayed

import matrices_generation as mg

In [2]:
#data_source = 'ml-latest-small' # 100k MovieLens dataset 2016
#data_source = 'mlsmall' # 100k MovieLens dataset 2018
data_source = 'ml-10M100K' # 10M MovieLens dataset

reader = Reader()
path = os.path.join('../','Data',data_source)
rate = pd.read_csv(path+'/ratings.csv')
raw_tags = pd.read_csv(path+'/tags.csv', encoding='utf-8')

#gb_tags = raw_tags.groupby(['tag'], as_index=False)['userId'].count()
#filtered_tags = raw_tags[raw_tags.tag.isin(list(gb_tags.tag[gb_tags.userId >= tag_threshold]))].reset_index(drop=True)

data = Dataset.load_from_df(rate[['userId', 'movieId', 'rating']], reader)

cv = KFold(n_splits=10, random_state=123)

In [3]:
def cold_start_input(data, rate, n_rating=10, n_exp=10, random_state=123):
    np.random.seed(random_state)
    for i in np.random.randint(0, 1000, n_exp):
        temp = rate.groupby('userId')['movieId'].apply(lambda s: s.sample(n_rating, random_state=i)).reset_index()
        temp2 = np.arange(len(data.raw_ratings))
        temp2 = np.delete(temp2, temp.level_1)

        raw_trainset = [data.raw_ratings[i] for i in temp.level_1]
        raw_testset = [data.raw_ratings[i] for i in temp2]

        trainset = data.construct_trainset(raw_trainset)
        testset = data.construct_testset(raw_testset)
        
        yield trainset, testset
        
def parellel_cold_start(algo, data, raw_ratings, measures=['rmse', 'mae'], random_state=123, n_rating=10, n_exp=10, n_jobs=1, pre_dispatch='2*n_jobs', return_train_measures = False, verbose=True):
    measures = [m.lower() for m in measures]

    delayed_list = (delayed(fit_and_score)(algo, trainset, testset, measures,
                                               return_train_measures)
                    for (trainset, testset) in cold_start_input(data, raw_ratings, n_rating, n_exp, random_state))

    out = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch)(delayed_list)

    (test_measures_dicts,
     train_measures_dicts,
     fit_times,
     test_times) = zip(*out)

    test_measures = dict()
    train_measures = dict()
    ret = dict()
    for m in measures:
        test_measures[m] = np.asarray([d[m] for d in test_measures_dicts])
        ret['test_' + m] = test_measures[m]
        if return_train_measures:
            train_measures[m] = np.asarray([d[m] for d in
                                            train_measures_dicts])
            ret['train_' + m] = train_measures[m]

    ret['fit_time'] = fit_times
    ret['test_time'] = test_times

    if verbose:
        print_summary(algo, measures, test_measures, train_measures, fit_times,
                      test_times, n_exp)

In [4]:
m_list=['RMSE', 'MAE', 'PREC_5', 'REC_5', 'NDCG_5', 'PREC_10', 'REC_10', 'NDCG_10', 'PREC_15', 'REC_15', 'NDCG_15']

## Cold Start User - 10

### CoSVD

In [9]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=40, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9155  0.9149  0.9150  0.9148  0.9157  0.9148  0.9157  0.9145  0.9148  0.9155  0.9151  0.0004  
MAE (testset)     0.7100  0.7094  0.7096  0.7093  0.7103  0.7094  0.7105  0.7089  0.7098  0.7096  0.7097  0.0005  
PREC_5 (testset)  0.4834  0.4871  0.4782  0.4832  0.4791  0.4829  0.4832  0.4860  0.4833  0.4804  0.4827  0.0026  
REC_5 (testset)   0.0845  0.0862  0.0836  0.0858  0.0837  0.0852  0.0841  0.0852  0.0851  0.0854  0.0849  0.0008  
NDCG_5 (testset)  0.6903  0.6886  0.6884  0.6892  0.6878  0.6890  0.6883  0.6887  0.6896  0.6882  0.6888  0.0007  
PREC_10 (testset) 0.4762  0.4798  0.4710  0.4757  0.4715  0.4759  0.4760  0.4791  0.4759  0.4731  0.4754  0.0027  
REC_10 (testset)  0.1310  0.1330  0.1294  0.1321  0.1297  0.1

In [10]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=30, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9139  0.9132  0.9134  0.9130  0.9142  0.9130  0.9139  0.9129  0.9133  0.9138  0.9135  0.0004  
MAE (testset)     0.7086  0.7079  0.7082  0.7078  0.7090  0.7079  0.7089  0.7075  0.7084  0.7081  0.7082  0.0005  
PREC_5 (testset)  0.4827  0.4859  0.4761  0.4809  0.4774  0.4804  0.4809  0.4829  0.4821  0.4788  0.4808  0.0027  
REC_5 (testset)   0.0842  0.0862  0.0829  0.0851  0.0834  0.0846  0.0839  0.0848  0.0849  0.0851  0.0845  0.0009  
NDCG_5 (testset)  0.6929  0.6921  0.6904  0.6909  0.6894  0.6923  0.6910  0.6907  0.6912  0.6914  0.6912  0.0009  
PREC_10 (testset) 0.4750  0.4788  0.4688  0.4736  0.4701  0.4732  0.4741  0.4751  0.4750  0.4715  0.4735  0.0027  
REC_10 (testset)  0.1304  0.1330  0.1288  0.1315  0.1293  0.1

In [11]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=20, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9114  0.9111  0.9110  0.9108  0.9118  0.9110  0.9117  0.9108  0.9109  0.9115  0.9112  0.0004  
MAE (testset)     0.7065  0.7061  0.7063  0.7059  0.7069  0.7061  0.7070  0.7056  0.7064  0.7062  0.7063  0.0004  
PREC_5 (testset)  0.4796  0.4804  0.4738  0.4799  0.4732  0.4786  0.4774  0.4808  0.4783  0.4753  0.4777  0.0026  
REC_5 (testset)   0.0834  0.0850  0.0826  0.0847  0.0828  0.0842  0.0831  0.0843  0.0840  0.0843  0.0838  0.0008  
NDCG_5 (testset)  0.6972  0.6949  0.6935  0.6948  0.6936  0.6947  0.6947  0.6945  0.6957  0.6948  0.6948  0.0010  
PREC_10 (testset) 0.4716  0.4734  0.4663  0.4728  0.4660  0.4716  0.4704  0.4733  0.4714  0.4681  0.4705  0.0026  
REC_10 (testset)  0.1295  0.1317  0.1283  0.1310  0.1287  0.1

### SVD

In [6]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=40, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9185  0.9177  0.9178  0.9178  0.9182  0.9171  0.9180  0.9170  0.9175  0.9184  0.9178  0.0005  
MAE (testset)     0.7118  0.7107  0.7111  0.7110  0.7115  0.7106  0.7116  0.7101  0.7109  0.7111  0.7110  0.0005  
PREC_5 (testset)  0.4631  0.4708  0.4590  0.4587  0.4617  0.4624  0.4668  0.4634  0.4683  0.4585  0.4633  0.0040  
REC_5 (testset)   0.0799  0.0827  0.0793  0.0812  0.0801  0.0809  0.0803  0.0805  0.0819  0.0811  0.0808  0.0009  
NDCG_5 (testset)  0.6918  0.6925  0.6908  0.6883  0.6916  0.6919  0.6905  0.6899  0.6941  0.6894  0.6911  0.0016  
PREC_10 (testset) 0.4564  0.4637  0.4525  0.4521  0.4546  0.4557  0.4604  0.4573  0.4604  0.4517  0.4565  0.0038  
REC_10 (testset)  0.1256  0.1290  0.1245  0.1268  0.1255  0.1266 

In [7]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=30, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9194  0.9185  0.9188  0.9189  0.9192  0.9178  0.9188  0.9178  0.9185  0.9192  0.9187  0.0005  
MAE (testset)     0.7123  0.7111  0.7116  0.7116  0.7121  0.7109  0.7119  0.7105  0.7116  0.7115  0.7115  0.0005  
PREC_5 (testset)  0.4639  0.4712  0.4589  0.4575  0.4611  0.4641  0.4659  0.4637  0.4675  0.4596  0.4633  0.0040  
REC_5 (testset)   0.0802  0.0831  0.0794  0.0808  0.0803  0.0811  0.0806  0.0808  0.0818  0.0814  0.0809  0.0010  
NDCG_5 (testset)  0.6901  0.6931  0.6893  0.6866  0.6901  0.6924  0.6901  0.6896  0.6916  0.6888  0.6902  0.0018  
PREC_10 (testset) 0.4566  0.4643  0.4520  0.4512  0.4539  0.4572  0.4598  0.4573  0.4599  0.4529  0.4565  0.0039  
REC_10 (testset)  0.1259  0.1295  0.1246  0.1266  0.1259  0.1270 

In [8]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=20, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9199  0.9197  0.9195  0.9197  0.9199  0.9191  0.9197  0.9189  0.9192  0.9198  0.9195  0.0004  
MAE (testset)     0.7123  0.7117  0.7120  0.7120  0.7123  0.7116  0.7124  0.7110  0.7119  0.7117  0.7119  0.0004  
PREC_5 (testset)  0.4630  0.4705  0.4583  0.4597  0.4588  0.4624  0.4660  0.4634  0.4669  0.4590  0.4628  0.0038  
REC_5 (testset)   0.0801  0.0829  0.0796  0.0814  0.0805  0.0812  0.0807  0.0810  0.0819  0.0817  0.0811  0.0009  
NDCG_5 (testset)  0.6901  0.6914  0.6880  0.6876  0.6891  0.6902  0.6883  0.6882  0.6913  0.6880  0.6892  0.0014  
PREC_10 (testset) 0.4558  0.4633  0.4515  0.4529  0.4521  0.4562  0.4593  0.4569  0.4596  0.4524  0.4560  0.0037  
REC_10 (testset)  0.1259  0.1294  0.1250  0.1276  0.1264  0.1275 

### SVD++

In [12]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9295  0.9293  0.9308  0.9290  0.9305  0.9300  0.9308  0.9293  0.9297  0.9298  0.9299  0.0006  
MAE (testset)     0.7240  0.7246  0.7256  0.7242  0.7252  0.7248  0.7256  0.7246  0.7253  0.7244  0.7249  0.0005  
PREC_5 (testset)  0.5308  0.5208  0.5158  0.5215  0.5231  0.5263  0.5294  0.5232  0.5210  0.5230  0.5235  0.0042  
REC_5 (testset)   0.0945  0.0924  0.0903  0.0918  0.0921  0.0931  0.0936  0.0920  0.0914  0.0933  0.0924  0.0011  
NDCG_5 (testset)  0.6922  0.6875  0.6868  0.6908  0.6901  0.6906  0.6906  0.6894  0.6900  0.6921  0.6900  0.0017  
PREC_10 (testset) 0.5217  0.5126  0.5081  0.5129  0.5149  0.5178  0.5213  0.5150  0.5126  0.5146  0.5152  0.0039  
REC_10 (testset)  0.1403  0.1377  0.1344  0.1361  0.1373  0.138

In [13]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9248  0.9241  0.9254  0.9234  0.9255  0.9244  0.9255  0.9248  0.9250  0.9250  0.9248  0.0006  
MAE (testset)     0.7204  0.7198  0.7207  0.7195  0.7213  0.7199  0.7213  0.7203  0.7210  0.7205  0.7205  0.0006  
PREC_5 (testset)  0.5200  0.5272  0.5146  0.5177  0.5200  0.5206  0.5264  0.5149  0.5180  0.5161  0.5195  0.0041  
REC_5 (testset)   0.0921  0.0940  0.0905  0.0916  0.0909  0.0930  0.0925  0.0912  0.0916  0.0909  0.0918  0.0010  
NDCG_5 (testset)  0.6960  0.6984  0.6942  0.6948  0.6965  0.6945  0.6952  0.6900  0.6957  0.6960  0.6951  0.0021  
PREC_10 (testset) 0.5120  0.5182  0.5067  0.5101  0.5115  0.5122  0.5178  0.5071  0.5099  0.5080  0.5113  0.0038  
REC_10 (testset)  0.1374  0.1396  0.1353  0.1368  0.1357  0.139

In [15]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9189  0.9189  0.9199  0.9184  0.9200  0.9197  0.9199  0.9196  0.9187  0.9191  0.9193  0.0005  
MAE (testset)     0.7154  0.7155  0.7165  0.7151  0.7164  0.7164  0.7170  0.7162  0.7161  0.7155  0.7160  0.0006  
PREC_5 (testset)  0.5203  0.5239  0.5111  0.5138  0.5177  0.5165  0.5163  0.5159  0.5184  0.5126  0.5167  0.0036  
REC_5 (testset)   0.0914  0.0927  0.0894  0.0910  0.0907  0.0911  0.0896  0.0901  0.0907  0.0904  0.0907  0.0009  
NDCG_5 (testset)  0.7052  0.7053  0.7024  0.7010  0.7027  0.7038  0.7014  0.6999  0.7027  0.7022  0.7026  0.0017  
PREC_10 (testset) 0.5114  0.5154  0.5032  0.5057  0.5090  0.5077  0.5077  0.5076  0.5101  0.5044  0.5082  0.0034  
REC_10 (testset)  0.1364  0.1382  0.1335  0.1358  0.1355  0.136

### NMF

In [16]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9419  0.9415  0.9412  0.9414  0.9429  0.9416  0.9426  0.9397  0.9421  0.9412  0.9416  0.0008  
MAE (testset)     0.7353  0.7347  0.7344  0.7345  0.7361  0.7348  0.7360  0.7332  0.7354  0.7341  0.7349  0.0008  
PREC_5 (testset)  0.4502  0.4542  0.4508  0.4535  0.4493  0.4548  0.4532  0.4554  0.4524  0.4506  0.4524  0.0020  
REC_5 (testset)   0.0752  0.0767  0.0756  0.0771  0.0753  0.0766  0.0753  0.0765  0.0761  0.0768  0.0761  0.0007  
NDCG_5 (testset)  0.6767  0.6764  0.6766  0.6794  0.6770  0.6789  0.6784  0.6799  0.6774  0.6782  0.6779  0.0012  
PREC_10 (testset) 0.4445  0.4487  0.4447  0.4473  0.4435  0.4482  0.4476  0.4493  0.4469  0.4452  0.4466  0.0019  
REC_10 (testset)  0.1171  0.1190  0.1173  0.1191  0.1170  0.1185 

In [17]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9490  0.9490  0.9489  0.9483  0.9498  0.9486  0.9497  0.9468  0.9496  0.9488  0.9489  0.0008  
MAE (testset)     0.7421  0.7418  0.7420  0.7413  0.7427  0.7416  0.7428  0.7401  0.7425  0.7415  0.7418  0.0007  
PREC_5 (testset)  0.4509  0.4538  0.4509  0.4514  0.4503  0.4530  0.4542  0.4558  0.4537  0.4483  0.4522  0.0021  
REC_5 (testset)   0.0743  0.0755  0.0745  0.0760  0.0747  0.0754  0.0750  0.0753  0.0756  0.0752  0.0752  0.0005  
NDCG_5 (testset)  0.6724  0.6722  0.6734  0.6730  0.6736  0.6742  0.6728  0.6736  0.6737  0.6723  0.6731  0.0006  
PREC_10 (testset) 0.4457  0.4483  0.4451  0.4457  0.4449  0.4470  0.4486  0.4498  0.4479  0.4430  0.4466  0.0020  
REC_10 (testset)  0.1150  0.1164  0.1149  0.1170  0.1154  0.1159 

In [18]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=10, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9613  0.9602  0.9597  0.9605  0.9616  0.9599  0.9610  0.9588  0.9613  0.9604  0.9605  0.0008  
MAE (testset)     0.7535  0.7523  0.7519  0.7528  0.7538  0.7522  0.7533  0.7512  0.7535  0.7523  0.7527  0.0008  
PREC_5 (testset)  0.4515  0.4554  0.4492  0.4487  0.4482  0.4507  0.4522  0.4527  0.4519  0.4474  0.4508  0.0023  
REC_5 (testset)   0.0729  0.0749  0.0735  0.0739  0.0727  0.0737  0.0736  0.0735  0.0738  0.0737  0.0736  0.0006  
NDCG_5 (testset)  0.6660  0.6690  0.6686  0.6668  0.6667  0.6673  0.6667  0.6672  0.6669  0.6654  0.6671  0.0010  
PREC_10 (testset) 0.4459  0.4497  0.4437  0.4436  0.4435  0.4452  0.4471  0.4473  0.4468  0.4419  0.4455  0.0022  
REC_10 (testset)  0.1118  0.1145  0.1126  0.1131  0.1117  0.1128 

## Cold Start User - 15

### CoSVD

In [19]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=40, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9050  0.9052  0.9052  0.9047  0.9053  0.9053  0.9057  0.9052  0.9051  0.9051  0.9052  0.0003  
MAE (testset)     0.7011  0.7007  0.7012  0.7007  0.7010  0.7013  0.7017  0.7007  0.7011  0.7010  0.7011  0.0003  
PREC_5 (testset)  0.4656  0.4684  0.4615  0.4644  0.4635  0.4702  0.4632  0.4674  0.4644  0.4606  0.4649  0.0029  
REC_5 (testset)   0.0976  0.0990  0.0961  0.0988  0.0977  0.1001  0.0968  0.0978  0.0975  0.0969  0.0978  0.0011  
NDCG_5 (testset)  0.7050  0.7050  0.7035  0.7040  0.7038  0.7046  0.7027  0.7041  0.7041  0.7030  0.7040  0.0007  
PREC_10 (testset) 0.4582  0.4619  0.4551  0.4581  0.4569  0.4636  0.4567  0.4609  0.4579  0.4543  0.4584  0.0028  
REC_10 (testset)  0.1383  0.1404  0.1372  0.1402  0.1391  0.1

In [20]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=30, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9037  0.9035  0.9033  0.9031  0.9039  0.9031  0.9038  0.9033  0.9033  0.9038  0.9035  0.0003  
MAE (testset)     0.6999  0.6994  0.6994  0.6993  0.6998  0.6993  0.7000  0.6991  0.6996  0.6999  0.6996  0.0003  
PREC_5 (testset)  0.4624  0.4659  0.4590  0.4598  0.4611  0.4689  0.4603  0.4631  0.4626  0.4569  0.4620  0.0033  
REC_5 (testset)   0.0967  0.0978  0.0961  0.0981  0.0976  0.0998  0.0964  0.0972  0.0975  0.0961  0.0973  0.0011  
NDCG_5 (testset)  0.7070  0.7064  0.7047  0.7049  0.7055  0.7076  0.7055  0.7057  0.7063  0.7048  0.7058  0.0009  
PREC_10 (testset) 0.4558  0.4591  0.4527  0.4535  0.4541  0.4620  0.4537  0.4564  0.4560  0.4504  0.4554  0.0031  
REC_10 (testset)  0.1374  0.1390  0.1370  0.1393  0.1386  0.1

In [21]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=20, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9015  0.9009  0.9014  0.9009  0.9015  0.9013  0.9015  0.9014  0.9009  0.9014  0.9012  0.0002  
MAE (testset)     0.6980  0.6971  0.6978  0.6974  0.6978  0.6978  0.6980  0.6974  0.6976  0.6978  0.6977  0.0003  
PREC_5 (testset)  0.4577  0.4630  0.4554  0.4587  0.4575  0.4630  0.4566  0.4609  0.4588  0.4552  0.4587  0.0027  
REC_5 (testset)   0.0956  0.0973  0.0948  0.0978  0.0966  0.0987  0.0957  0.0961  0.0963  0.0959  0.0965  0.0011  
NDCG_5 (testset)  0.7095  0.7100  0.7079  0.7084  0.7090  0.7091  0.7090  0.7083  0.7099  0.7079  0.7089  0.0007  
PREC_10 (testset) 0.4510  0.4564  0.4490  0.4520  0.4511  0.4568  0.4503  0.4548  0.4523  0.4491  0.4523  0.0027  
REC_10 (testset)  0.1362  0.1385  0.1355  0.1389  0.1377  0.1

### SVD

In [22]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=40, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9034  0.9035  0.9036  0.9031  0.9032  0.9033  0.9037  0.9034  0.9030  0.9034  0.9034  0.0002  
MAE (testset)     0.6994  0.6990  0.6995  0.6991  0.6989  0.6994  0.6997  0.6989  0.6990  0.6993  0.6992  0.0003  
PREC_5 (testset)  0.4456  0.4545  0.4435  0.4460  0.4445  0.4482  0.4461  0.4479  0.4474  0.4412  0.4465  0.0033  
REC_5 (testset)   0.0922  0.0944  0.0913  0.0941  0.0934  0.0944  0.0918  0.0929  0.0935  0.0921  0.0930  0.0011  
NDCG_5 (testset)  0.7107  0.7133  0.7107  0.7102  0.7103  0.7105  0.7089  0.7106  0.7128  0.7089  0.7107  0.0014  
PREC_10 (testset) 0.4387  0.4478  0.4367  0.4396  0.4379  0.4416  0.4401  0.4417  0.4406  0.4350  0.4400  0.0033  
REC_10 (testset)  0.1316  0.1344  0.1307  0.1342  0.1334  0.1344 

In [23]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=30, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9047  0.9044  0.9041  0.9044  0.9045  0.9032  0.9045  0.9040  0.9038  0.9048  0.9042  0.0005  
MAE (testset)     0.7003  0.6996  0.6997  0.6999  0.6997  0.6990  0.7000  0.6992  0.6994  0.7002  0.6997  0.0004  
PREC_5 (testset)  0.4459  0.4526  0.4438  0.4434  0.4430  0.4507  0.4471  0.4485  0.4481  0.4413  0.4464  0.0034  
REC_5 (testset)   0.0923  0.0940  0.0920  0.0941  0.0932  0.0945  0.0922  0.0933  0.0938  0.0921  0.0931  0.0009  
NDCG_5 (testset)  0.7090  0.7111  0.7083  0.7072  0.7083  0.7097  0.7075  0.7094  0.7104  0.7079  0.7089  0.0012  
PREC_10 (testset) 0.4395  0.4463  0.4375  0.4373  0.4370  0.4439  0.4412  0.4426  0.4415  0.4352  0.4402  0.0033  
REC_10 (testset)  0.1316  0.1342  0.1316  0.1343  0.1334  0.1346 

In [24]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=20, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9058  0.9049  0.9055  0.9055  0.9049  0.9054  0.9052  0.9053  0.9042  0.9055  0.9052  0.0004  
MAE (testset)     0.7010  0.6996  0.7005  0.7005  0.6998  0.7007  0.7004  0.7000  0.6995  0.7005  0.7003  0.0005  
PREC_5 (testset)  0.4424  0.4549  0.4416  0.4448  0.4421  0.4464  0.4442  0.4471  0.4451  0.4397  0.4448  0.0040  
REC_5 (testset)   0.0917  0.0947  0.0914  0.0948  0.0932  0.0944  0.0919  0.0933  0.0933  0.0927  0.0931  0.0012  
NDCG_5 (testset)  0.7068  0.7116  0.7063  0.7069  0.7073  0.7071  0.7064  0.7081  0.7094  0.7058  0.7076  0.0017  
PREC_10 (testset) 0.4365  0.4479  0.4358  0.4381  0.4363  0.4404  0.4389  0.4412  0.4389  0.4339  0.4388  0.0037  
REC_10 (testset)  0.1314  0.1348  0.1311  0.1349  0.1335  0.1343 

### SVD++

In [25]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9088  0.9082  0.9088  0.9081  0.9093  0.9085  0.9092  0.9080  0.9085  0.9076  0.9085  0.0005  
MAE (testset)     0.7074  0.7070  0.7074  0.7071  0.7078  0.7075  0.7078  0.7067  0.7076  0.7066  0.7073  0.0004  
PREC_5 (testset)  0.5239  0.5259  0.5209  0.5209  0.5200  0.5289  0.5270  0.5248  0.5205  0.5203  0.5233  0.0030  
REC_5 (testset)   0.1082  0.1102  0.1068  0.1089  0.1087  0.1103  0.1099  0.1090  0.1077  0.1070  0.1087  0.0012  
NDCG_5 (testset)  0.7136  0.7155  0.7144  0.7142  0.7131  0.7168  0.7147  0.7146  0.7142  0.7163  0.7147  0.0011  
PREC_10 (testset) 0.5155  0.5180  0.5124  0.5133  0.5123  0.5203  0.5190  0.5167  0.5126  0.5123  0.5152  0.0029  
REC_10 (testset)  0.1492  0.1517  0.1477  0.1503  0.1501  0.151

In [26]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9035  0.9040  0.9037  0.9028  0.9044  0.9034  0.9040  0.9029  0.9030  0.9031  0.9035  0.0005  
MAE (testset)     0.7033  0.7033  0.7030  0.7027  0.7038  0.7032  0.7037  0.7026  0.7029  0.7028  0.7031  0.0004  
PREC_5 (testset)  0.5163  0.5211  0.5151  0.5155  0.5147  0.5262  0.5213  0.5203  0.5178  0.5122  0.5181  0.0040  
REC_5 (testset)   0.1054  0.1086  0.1057  0.1077  0.1060  0.1096  0.1083  0.1071  0.1070  0.1057  0.1071  0.0014  
NDCG_5 (testset)  0.7208  0.7192  0.7182  0.7194  0.7196  0.7220  0.7201  0.7194  0.7209  0.7183  0.7198  0.0011  
PREC_10 (testset) 0.5082  0.5127  0.5076  0.5074  0.5065  0.5178  0.5129  0.5126  0.5099  0.5041  0.5100  0.0038  
REC_10 (testset)  0.1455  0.1499  0.1463  0.1488  0.1465  0.150

In [27]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8978  0.8975  0.8985  0.8976  0.8983  0.8988  0.8986  0.8970  0.8976  0.8980  0.8980  0.0005  
MAE (testset)     0.6986  0.6981  0.6987  0.6985  0.6988  0.6994  0.6993  0.6978  0.6986  0.6986  0.6986  0.0005  
PREC_5 (testset)  0.5092  0.5192  0.5115  0.5113  0.5119  0.5200  0.5161  0.5176  0.5126  0.5079  0.5137  0.0040  
REC_5 (testset)   0.1047  0.1071  0.1039  0.1062  0.1048  0.1072  0.1058  0.1064  0.1051  0.1041  0.1055  0.0011  
NDCG_5 (testset)  0.7254  0.7274  0.7252  0.7259  0.7266  0.7259  0.7262  0.7267  0.7269  0.7263  0.7263  0.0007  
PREC_10 (testset) 0.5016  0.5112  0.5034  0.5033  0.5039  0.5118  0.5078  0.5095  0.5043  0.4998  0.5057  0.0039  
REC_10 (testset)  0.1450  0.1480  0.1442  0.1466  0.1454  0.148

### NMF

In [28]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9152  0.9152  0.9156  0.9146  0.9160  0.9155  0.9154  0.9147  0.9162  0.9159  0.9154  0.0005  
MAE (testset)     0.7120  0.7115  0.7119  0.7114  0.7123  0.7120  0.7119  0.7111  0.7126  0.7125  0.7119  0.0005  
PREC_5 (testset)  0.4372  0.4418  0.4378  0.4399  0.4387  0.4430  0.4418  0.4405  0.4374  0.4358  0.4394  0.0023  
REC_5 (testset)   0.0880  0.0901  0.0889  0.0908  0.0894  0.0913  0.0890  0.0891  0.0889  0.0884  0.0894  0.0010  
NDCG_5 (testset)  0.7006  0.7022  0.7015  0.7032  0.7005  0.7008  0.7026  0.7002  0.7018  0.6995  0.7013  0.0011  
PREC_10 (testset) 0.4317  0.4361  0.4316  0.4344  0.4330  0.4371  0.4360  0.4351  0.4311  0.4306  0.4337  0.0023  
REC_10 (testset)  0.1254  0.1282  0.1270  0.1291  0.1275  0.1296 

In [29]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9207  0.9206  0.9216  0.9207  0.9216  0.9208  0.9218  0.9193  0.9211  0.9212  0.9209  0.0007  
MAE (testset)     0.7180  0.7174  0.7183  0.7178  0.7182  0.7179  0.7187  0.7162  0.7180  0.7181  0.7178  0.0006  
PREC_5 (testset)  0.4339  0.4378  0.4339  0.4356  0.4359  0.4394  0.4357  0.4369  0.4337  0.4323  0.4355  0.0020  
REC_5 (testset)   0.0856  0.0873  0.0862  0.0879  0.0866  0.0886  0.0863  0.0867  0.0867  0.0859  0.0868  0.0009  
NDCG_5 (testset)  0.6970  0.6971  0.6973  0.6962  0.6966  0.6992  0.6974  0.6981  0.6970  0.6946  0.6971  0.0011  
PREC_10 (testset) 0.4284  0.4324  0.4284  0.4305  0.4304  0.4341  0.4299  0.4317  0.4279  0.4270  0.4301  0.0021  
REC_10 (testset)  0.1216  0.1239  0.1226  0.1248  0.1232  0.1253 

In [30]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=15, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9290  0.9286  0.9290  0.9289  0.9303  0.9297  0.9301  0.9284  0.9298  0.9294  0.9293  0.0006  
MAE (testset)     0.7265  0.7258  0.7263  0.7264  0.7275  0.7271  0.7275  0.7256  0.7271  0.7269  0.7267  0.0006  
PREC_5 (testset)  0.4271  0.4318  0.4274  0.4297  0.4279  0.4326  0.4307  0.4296  0.4265  0.4273  0.4291  0.0020  
REC_5 (testset)   0.0823  0.0836  0.0827  0.0837  0.0830  0.0848  0.0825  0.0823  0.0827  0.0823  0.0830  0.0008  
NDCG_5 (testset)  0.6921  0.6926  0.6925  0.6911  0.6904  0.6927  0.6922  0.6918  0.6923  0.6906  0.6918  0.0008  
PREC_10 (testset) 0.4223  0.4270  0.4227  0.4250  0.4229  0.4275  0.4255  0.4249  0.4215  0.4225  0.4242  0.0020  
REC_10 (testset)  0.1163  0.1183  0.1172  0.1181  0.1172  0.1196 

## Cold Start User - 20

### CoSVD

In [31]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=40, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8964  0.8971  0.8971  0.8972  0.8978  0.8976  0.8971  0.8973  0.8974  0.8973  0.8972  0.0004  
MAE (testset)     0.6940  0.6939  0.6942  0.6944  0.6946  0.6947  0.6944  0.6940  0.6945  0.6944  0.6943  0.0003  
PREC_5 (testset)  0.4499  0.4492  0.4480  0.4473  0.4454  0.4522  0.4456  0.4502  0.4474  0.4435  0.4479  0.0025  
REC_5 (testset)   0.1073  0.1064  0.1057  0.1070  0.1059  0.1074  0.1046  0.1053  0.1061  0.1032  0.1059  0.0012  
NDCG_5 (testset)  0.7245  0.7216  0.7221  0.7216  0.7211  0.7223  0.7202  0.7229  0.7223  0.7210  0.7220  0.0011  
PREC_10 (testset) 0.4435  0.4428  0.4412  0.4410  0.4393  0.4458  0.4396  0.4437  0.4410  0.4372  0.4415  0.0024  
REC_10 (testset)  0.1453  0.1449  0.1435  0.1448  0.1439  0.1

In [32]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=30, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8950  0.8955  0.8961  0.8956  0.8955  0.8955  0.8958  0.8953  0.8955  0.8954  0.8955  0.0003  
MAE (testset)     0.6926  0.6925  0.6933  0.6931  0.6928  0.6929  0.6931  0.6923  0.6929  0.6927  0.6928  0.0003  
PREC_5 (testset)  0.4478  0.4473  0.4453  0.4421  0.4470  0.4521  0.4446  0.4473  0.4442  0.4423  0.4460  0.0028  
REC_5 (testset)   0.1060  0.1058  0.1047  0.1059  0.1061  0.1075  0.1043  0.1046  0.1054  0.1036  0.1054  0.0011  
NDCG_5 (testset)  0.7250  0.7237  0.7231  0.7232  0.7249  0.7253  0.7211  0.7242  0.7244  0.7235  0.7238  0.0012  
PREC_10 (testset) 0.4414  0.4407  0.4383  0.4358  0.4402  0.4449  0.4385  0.4402  0.4375  0.4360  0.4393  0.0026  
REC_10 (testset)  0.1439  0.1440  0.1423  0.1435  0.1439  0.1

In [33]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=20, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8929  0.8928  0.8933  0.8927  0.8931  0.8933  0.8930  0.8928  0.8928  0.8933  0.8930  0.0002  
MAE (testset)     0.6909  0.6903  0.6910  0.6907  0.6907  0.6910  0.6909  0.6902  0.6907  0.6908  0.6907  0.0003  
PREC_5 (testset)  0.4445  0.4473  0.4402  0.4423  0.4432  0.4484  0.4433  0.4447  0.4423  0.4365  0.4433  0.0032  
REC_5 (testset)   0.1056  0.1058  0.1036  0.1060  0.1052  0.1069  0.1042  0.1039  0.1047  0.1026  0.1049  0.0012  
NDCG_5 (testset)  0.7272  0.7275  0.7261  0.7265  0.7277  0.7275  0.7263  0.7274  0.7273  0.7253  0.7269  0.0007  
PREC_10 (testset) 0.4380  0.4405  0.4339  0.4360  0.4368  0.4418  0.4366  0.4382  0.4359  0.4308  0.4368  0.0030  
REC_10 (testset)  0.1433  0.1437  0.1411  0.1436  0.1428  0.1

### SVD

In [34]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=40, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8903  0.8908  0.8910  0.8913  0.8912  0.8911  0.8905  0.8906  0.8909  0.8908  0.8908  0.0003  
MAE (testset)     0.6889  0.6887  0.6892  0.6896  0.6892  0.6895  0.6889  0.6885  0.6891  0.6890  0.6890  0.0003  
PREC_5 (testset)  0.4356  0.4346  0.4308  0.4314  0.4311  0.4337  0.4355  0.4359  0.4319  0.4275  0.4328  0.0026  
REC_5 (testset)   0.1019  0.1012  0.1000  0.1017  0.1005  0.1016  0.0998  0.1008  0.1007  0.0990  0.1007  0.0009  
NDCG_5 (testset)  0.7350  0.7334  0.7341  0.7326  0.7330  0.7322  0.7327  0.7347  0.7350  0.7326  0.7335  0.0010  
PREC_10 (testset) 0.4289  0.4280  0.4240  0.4248  0.4246  0.4272  0.4293  0.4294  0.4249  0.4215  0.4263  0.0025  
REC_10 (testset)  0.1382  0.1379  0.1363  0.1381  0.1368  0.1383 

In [35]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=30, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8914  0.8917  0.8925  0.8929  0.8920  0.8912  0.8916  0.8913  0.8921  0.8923  0.8919  0.0005  
MAE (testset)     0.6895  0.6893  0.6902  0.6907  0.6897  0.6893  0.6895  0.6889  0.6898  0.6900  0.6897  0.0005  
PREC_5 (testset)  0.4353  0.4354  0.4312  0.4280  0.4330  0.4345  0.4346  0.4338  0.4306  0.4285  0.4325  0.0026  
REC_5 (testset)   0.1014  0.1013  0.1002  0.1012  0.1014  0.1019  0.1001  0.1010  0.1010  0.0993  0.1009  0.0007  
NDCG_5 (testset)  0.7329  0.7329  0.7319  0.7294  0.7333  0.7317  0.7301  0.7327  0.7318  0.7310  0.7318  0.0012  
PREC_10 (testset) 0.4290  0.4286  0.4243  0.4222  0.4267  0.4277  0.4287  0.4272  0.4243  0.4219  0.4261  0.0025  
REC_10 (testset)  0.1378  0.1382  0.1363  0.1374  0.1377  0.1386 

In [36]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=20, random_state=123)
start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8932  0.8917  0.8925  0.8923  0.8922  0.8922  0.8924  0.8918  0.8918  0.8923  0.8922  0.0004  
MAE (testset)     0.6908  0.6890  0.6901  0.6899  0.6897  0.6899  0.6900  0.6890  0.6894  0.6896  0.6897  0.0005  
PREC_5 (testset)  0.4319  0.4357  0.4287  0.4295  0.4300  0.4337  0.4340  0.4342  0.4309  0.4264  0.4315  0.0028  
REC_5 (testset)   0.1013  0.1019  0.0998  0.1020  0.1011  0.1021  0.0999  0.1008  0.1009  0.0994  0.1009  0.0009  
NDCG_5 (testset)  0.7297  0.7326  0.7300  0.7295  0.7313  0.7304  0.7291  0.7322  0.7321  0.7292  0.7306  0.0013  
PREC_10 (testset) 0.4259  0.4293  0.4225  0.4234  0.4238  0.4278  0.4280  0.4278  0.4242  0.4203  0.4253  0.0027  
REC_10 (testset)  0.1378  0.1389  0.1360  0.1385  0.1376  0.1391 

### SVD++

In [37]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8920  0.8925  0.8937  0.8928  0.8939  0.8941  0.8931  0.8931  0.8930  0.8928  0.8931  0.0006  
MAE (testset)     0.6936  0.6936  0.6945  0.6943  0.6948  0.6951  0.6944  0.6941  0.6942  0.6939  0.6942  0.0005  
PREC_5 (testset)  0.5111  0.5121  0.5098  0.5091  0.5052  0.5135  0.5114  0.5133  0.5095  0.5043  0.5099  0.0030  
REC_5 (testset)   0.1181  0.1194  0.1177  0.1193  0.1171  0.1194  0.1184  0.1182  0.1183  0.1156  0.1182  0.0011  
NDCG_5 (testset)  0.7378  0.7371  0.7354  0.7372  0.7363  0.7375  0.7358  0.7367  0.7365  0.7362  0.7367  0.0007  
PREC_10 (testset) 0.5028  0.5040  0.5012  0.5007  0.4969  0.5052  0.5031  0.5046  0.5011  0.4962  0.5016  0.0029  
REC_10 (testset)  0.1575  0.1594  0.1569  0.1585  0.1561  0.159

In [38]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8870  0.8880  0.8891  0.8875  0.8888  0.8885  0.8883  0.8876  0.8883  0.8879  0.8881  0.0006  
MAE (testset)     0.6894  0.6899  0.6909  0.6900  0.6906  0.6906  0.6904  0.6896  0.6904  0.6899  0.6902  0.0004  
PREC_5 (testset)  0.5086  0.5100  0.5066  0.5027  0.5066  0.5114  0.5093  0.5089  0.5070  0.5012  0.5072  0.0030  
REC_5 (testset)   0.1169  0.1189  0.1167  0.1169  0.1167  0.1188  0.1176  0.1160  0.1167  0.1144  0.1170  0.0012  
NDCG_5 (testset)  0.7428  0.7420  0.7408  0.7416  0.7415  0.7432  0.7404  0.7422  0.7427  0.7416  0.7419  0.0009  
PREC_10 (testset) 0.4998  0.5014  0.4980  0.4946  0.4984  0.5030  0.5010  0.5000  0.4982  0.4931  0.4987  0.0029  
REC_10 (testset)  0.1560  0.1584  0.1557  0.1555  0.1555  0.158

In [39]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8820  0.8820  0.8834  0.8820  0.8830  0.8828  0.8826  0.8823  0.8828  0.8823  0.8825  0.0005  
MAE (testset)     0.6853  0.6852  0.6863  0.6855  0.6861  0.6859  0.6860  0.6852  0.6861  0.6854  0.6857  0.0004  
PREC_5 (testset)  0.5028  0.5042  0.5008  0.4978  0.5004  0.5062  0.5029  0.5033  0.5032  0.4993  0.5021  0.0024  
REC_5 (testset)   0.1151  0.1166  0.1142  0.1152  0.1155  0.1174  0.1152  0.1148  0.1156  0.1137  0.1153  0.0010  
NDCG_5 (testset)  0.7478  0.7483  0.7478  0.7476  0.7482  0.7463  0.7469  0.7474  0.7496  0.7472  0.7477  0.0008  
PREC_10 (testset) 0.4944  0.4955  0.4926  0.4895  0.4925  0.4984  0.4944  0.4950  0.4946  0.4910  0.4938  0.0024  
REC_10 (testset)  0.1538  0.1556  0.1528  0.1537  0.1544  0.156

### NMF

In [40]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.8997  0.8998  0.9014  0.9001  0.9007  0.9008  0.9010  0.9001  0.9013  0.9004  0.9005  0.0006  
MAE (testset)     0.6987  0.6984  0.6998  0.6989  0.6993  0.6996  0.6995  0.6985  0.6997  0.6989  0.6991  0.0005  
PREC_5 (testset)  0.4219  0.4230  0.4180  0.4208  0.4215  0.4250  0.4216  0.4197  0.4203  0.4164  0.4208  0.0023  
REC_5 (testset)   0.0962  0.0964  0.0952  0.0972  0.0965  0.0977  0.0948  0.0945  0.0963  0.0939  0.0959  0.0011  
NDCG_5 (testset)  0.7231  0.7217  0.7199  0.7220  0.7220  0.7232  0.7218  0.7213  0.7220  0.7215  0.7219  0.0009  
PREC_10 (testset) 0.4162  0.4172  0.4123  0.4151  0.4156  0.4187  0.4158  0.4140  0.4144  0.4105  0.4150  0.0022  
REC_10 (testset)  0.1309  0.1313  0.1302  0.1320  0.1312  0.1330 

In [41]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9038  0.9039  0.9053  0.9043  0.9049  0.9044  0.9055  0.9034  0.9056  0.9042  0.9045  0.0007  
MAE (testset)     0.7037  0.7034  0.7046  0.7041  0.7042  0.7040  0.7048  0.7030  0.7048  0.7037  0.7040  0.0006  
PREC_5 (testset)  0.4154  0.4171  0.4134  0.4159  0.4144  0.4193  0.4163  0.4178  0.4149  0.4129  0.4158  0.0019  
REC_5 (testset)   0.0929  0.0923  0.0920  0.0938  0.0926  0.0943  0.0914  0.0920  0.0924  0.0913  0.0925  0.0009  
NDCG_5 (testset)  0.7190  0.7179  0.7171  0.7189  0.7189  0.7191  0.7176  0.7196  0.7179  0.7179  0.7184  0.0008  
PREC_10 (testset) 0.4101  0.4116  0.4080  0.4103  0.4088  0.4141  0.4108  0.4119  0.4092  0.4077  0.4103  0.0019  
REC_10 (testset)  0.1262  0.1258  0.1254  0.1268  0.1259  0.1282 

In [42]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start(algo, data, rate, m_list, random_state=123, n_rating=20, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9102  0.9103  0.9116  0.9107  0.9115  0.9116  0.9117  0.9105  0.9122  0.9108  0.9111  0.0007  
MAE (testset)     0.7113  0.7109  0.7123  0.7118  0.7119  0.7122  0.7124  0.7111  0.7127  0.7116  0.7118  0.0006  
PREC_5 (testset)  0.4057  0.4069  0.4038  0.4084  0.4074  0.4104  0.4063  0.4056  0.4037  0.4024  0.4061  0.0023  
REC_5 (testset)   0.0878  0.0878  0.0867  0.0892  0.0882  0.0890  0.0865  0.0859  0.0880  0.0860  0.0875  0.0011  
NDCG_5 (testset)  0.7151  0.7143  0.7142  0.7161  0.7140  0.7158  0.7135  0.7148  0.7145  0.7136  0.7146  0.0008  
PREC_10 (testset) 0.4008  0.4025  0.3989  0.4032  0.4024  0.4060  0.4018  0.4007  0.3985  0.3977  0.4013  0.0024  
REC_10 (testset)  0.1190  0.1191  0.1180  0.1201  0.1191  0.1208 

## Cold Start Item - 100

In [43]:
def cold_start_item_input(data, rate, n_rating=100, n_exp=10, random_state=123):
    np.random.seed(random_state)
    for i in np.random.randint(0, 1000, n_exp):
        temp = rate.groupby('movieId')['userId'].apply(lambda s: s.sample(n_rating, random_state=i) if len(s) > n_rating else s).reset_index()
        temp2 = np.arange(len(data.raw_ratings))
        temp2 = np.delete(temp2, temp.level_1)

        raw_trainset = [data.raw_ratings[i] for i in temp.level_1]
        raw_testset = [data.raw_ratings[i] for i in temp2]

        trainset = data.construct_trainset(raw_trainset)
        testset = data.construct_testset(raw_testset)
        
        yield trainset, testset

def parellel_cold_start_item(algo, data, raw_ratings, measures=['rmse', 'mae'], random_state=123, n_rating=100, n_exp=10, n_jobs=1, pre_dispatch='2*n_jobs', return_train_measures = False, verbose=True):
    measures = [m.lower() for m in measures]

    delayed_list = (delayed(fit_and_score)(algo, trainset, testset, measures,
                                               return_train_measures)
                    for (trainset, testset) in cold_start_item_input(data, raw_ratings, n_rating, n_exp, random_state))

    out = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch)(delayed_list)

    (test_measures_dicts,
     train_measures_dicts,
     fit_times,
     test_times) = zip(*out)

    test_measures = dict()
    train_measures = dict()
    ret = dict()
    for m in measures:
        test_measures[m] = np.asarray([d[m] for d in test_measures_dicts])
        ret['test_' + m] = test_measures[m]
        if return_train_measures:
            train_measures[m] = np.asarray([d[m] for d in
                                            train_measures_dicts])
            ret['train_' + m] = train_measures[m]

    ret['fit_time'] = fit_times
    ret['test_time'] = test_times

    if verbose:
        print_summary(algo, measures, test_measures, train_measures, fit_times,
                      test_times, n_exp)

### CoSVD

In [44]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=40, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9163  0.9166  0.9160  0.9160  0.9156  0.9163  0.9159  0.9160  0.9161  0.9166  0.9161  0.0003  
MAE (testset)     0.7108  0.7112  0.7107  0.7106  0.7103  0.7111  0.7104  0.7110  0.7107  0.7113  0.7108  0.0003  
PREC_5 (testset)  0.6595  0.6636  0.6640  0.6610  0.6613  0.6702  0.6718  0.6700  0.6612  0.6745  0.6657  0.0051  
REC_5 (testset)   0.0952  0.0942  0.0941  0.0944  0.0934  0.0948  0.0955  0.0952  0.0942  0.0953  0.0946  0.0006  
NDCG_5 (testset)  0.6825  0.6837  0.6870  0.6841  0.6851  0.6912  0.6895  0.6888  0.6846  0.6878  0.6864  0.0027  
PREC_10 (testset) 0.6493  0.6522  0.6529  0.6505  0.6509  0.6579  0.6600  0.6589  0.6504  0.6622  0.6545  0.0045  
REC_10 (testset)  0.1656  0.1632  0.1637  0.1644  0.1618  0.1

In [45]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=30, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9147  0.9151  0.9147  0.9147  0.9143  0.9151  0.9144  0.9150  0.9145  0.9148  0.9147  0.0003  
MAE (testset)     0.7093  0.7098  0.7094  0.7094  0.7090  0.7099  0.7091  0.7101  0.7092  0.7097  0.7095  0.0004  
PREC_5 (testset)  0.6599  0.6636  0.6639  0.6612  0.6617  0.6702  0.6704  0.6688  0.6609  0.6746  0.6655  0.0048  
REC_5 (testset)   0.0952  0.0945  0.0945  0.0943  0.0936  0.0954  0.0948  0.0951  0.0945  0.0953  0.0947  0.0005  
NDCG_5 (testset)  0.6847  0.6851  0.6892  0.6866  0.6871  0.6928  0.6900  0.6895  0.6861  0.6900  0.6881  0.0024  
PREC_10 (testset) 0.6497  0.6522  0.6528  0.6507  0.6512  0.6581  0.6593  0.6574  0.6504  0.6619  0.6544  0.0042  
REC_10 (testset)  0.1661  0.1636  0.1644  0.1645  0.1622  0.1

In [46]:
algo = CoSVDv9(verbose=False, n_epochs=65, lr_all=0.0028, n_factors=20, tags=raw_tags, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm CoSVDv9 on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9130  0.9133  0.9126  0.9126  0.9127  0.9128  0.9127  0.9128  0.9127  0.9130  0.9128  0.0002  
MAE (testset)     0.7076  0.7083  0.7076  0.7076  0.7076  0.7079  0.7075  0.7081  0.7076  0.7081  0.7078  0.0003  
PREC_5 (testset)  0.6585  0.6657  0.6638  0.6608  0.6602  0.6702  0.6723  0.6697  0.6591  0.6750  0.6655  0.0056  
REC_5 (testset)   0.0952  0.0950  0.0944  0.0944  0.0935  0.0952  0.0955  0.0951  0.0942  0.0952  0.0948  0.0006  
NDCG_5 (testset)  0.6875  0.6870  0.6914  0.6876  0.6891  0.6959  0.6927  0.6924  0.6870  0.6921  0.6903  0.0029  
PREC_10 (testset) 0.6480  0.6544  0.6528  0.6504  0.6496  0.6579  0.6601  0.6581  0.6489  0.6627  0.6543  0.0049  
REC_10 (testset)  0.1658  0.1643  0.1641  0.1645  0.1623  0.1

### SVD

In [47]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=40, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9223  0.9227  0.9214  0.9216  0.9218  0.9224  0.9219  0.9221  0.9224  0.9227  0.9221  0.0004  
MAE (testset)     0.7157  0.7164  0.7153  0.7154  0.7156  0.7162  0.7157  0.7160  0.7160  0.7164  0.7159  0.0004  
PREC_5 (testset)  0.6445  0.6404  0.6464  0.6447  0.6442  0.6492  0.6489  0.6522  0.6399  0.6505  0.6461  0.0039  
REC_5 (testset)   0.0891  0.0865  0.0888  0.0885  0.0873  0.0876  0.0870  0.0883  0.0860  0.0877  0.0877  0.0009  
NDCG_5 (testset)  0.6836  0.6812  0.6850  0.6819  0.6827  0.6884  0.6859  0.6870  0.6804  0.6846  0.6841  0.0025  
PREC_10 (testset) 0.6352  0.6307  0.6360  0.6348  0.6349  0.6383  0.6385  0.6416  0.6302  0.6395  0.6360  0.0035  
REC_10 (testset)  0.1549  0.1508  0.1540  0.1541  0.1516  0.1524 

In [48]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=30, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9229  0.9242  0.9231  0.9234  0.9230  0.9242  0.9231  0.9242  0.9232  0.9239  0.9235  0.0005  
MAE (testset)     0.7157  0.7171  0.7160  0.7163  0.7160  0.7171  0.7162  0.7173  0.7162  0.7168  0.7165  0.0005  
PREC_5 (testset)  0.6462  0.6410  0.6483  0.6452  0.6445  0.6510  0.6486  0.6503  0.6413  0.6516  0.6468  0.0036  
REC_5 (testset)   0.0896  0.0869  0.0893  0.0888  0.0865  0.0880  0.0874  0.0882  0.0872  0.0882  0.0880  0.0010  
NDCG_5 (testset)  0.6841  0.6807  0.6855  0.6813  0.6828  0.6884  0.6849  0.6850  0.6799  0.6855  0.6838  0.0025  
PREC_10 (testset) 0.6364  0.6315  0.6379  0.6352  0.6352  0.6401  0.6381  0.6401  0.6320  0.6405  0.6367  0.0031  
REC_10 (testset)  0.1558  0.1515  0.1548  0.1549  0.1508  0.1533 

In [49]:
algo = SVD(verbose=False, n_epochs=60, lr_all=0.008, reg_all=0.091, n_factors=20, random_state=123)
start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVD on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9241  0.9256  0.9236  0.9241  0.9249  0.9244  0.9245  0.9245  0.9242  0.9250  0.9245  0.0005  
MAE (testset)     0.7161  0.7177  0.7160  0.7163  0.7171  0.7168  0.7168  0.7169  0.7164  0.7172  0.7167  0.0005  
PREC_5 (testset)  0.6452  0.6414  0.6469  0.6437  0.6432  0.6493  0.6497  0.6502  0.6400  0.6498  0.6459  0.0036  
REC_5 (testset)   0.0895  0.0869  0.0894  0.0888  0.0874  0.0879  0.0875  0.0884  0.0875  0.0881  0.0881  0.0008  
NDCG_5 (testset)  0.6847  0.6813  0.6860  0.6809  0.6834  0.6887  0.6856  0.6866  0.6814  0.6856  0.6844  0.0025  
PREC_10 (testset) 0.6353  0.6316  0.6369  0.6344  0.6344  0.6387  0.6393  0.6397  0.6311  0.6389  0.6360  0.0030  
REC_10 (testset)  0.1557  0.1515  0.1551  0.1549  0.1523  0.1531 

### SVD++

In [50]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9086  0.9096  0.9089  0.9089  0.9092  0.9092  0.9087  0.9087  0.9089  0.9096  0.9090  0.0003  
MAE (testset)     0.7086  0.7098  0.7092  0.7091  0.7094  0.7095  0.7088  0.7094  0.7094  0.7096  0.7093  0.0003  
PREC_5 (testset)  0.6487  0.6474  0.6509  0.6515  0.6534  0.6590  0.6589  0.6583  0.6445  0.6525  0.6525  0.0048  
REC_5 (testset)   0.0883  0.0869  0.0886  0.0896  0.0877  0.0890  0.0880  0.0884  0.0865  0.0873  0.0880  0.0009  
NDCG_5 (testset)  0.6824  0.6814  0.6851  0.6801  0.6838  0.6870  0.6879  0.6870  0.6796  0.6844  0.6839  0.0028  
PREC_10 (testset) 0.6397  0.6380  0.6408  0.6428  0.6436  0.6487  0.6492  0.6478  0.6364  0.6421  0.6429  0.0042  
REC_10 (testset)  0.1540  0.1511  0.1536  0.1553  0.1520  0.154

In [51]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9062  0.9072  0.9065  0.9070  0.9068  0.9070  0.9068  0.9074  0.9069  0.9075  0.9070  0.0004  
MAE (testset)     0.7062  0.7071  0.7067  0.7066  0.7069  0.7071  0.7065  0.7076  0.7071  0.7074  0.7069  0.0004  
PREC_5 (testset)  0.6557  0.6556  0.6567  0.6516  0.6562  0.6619  0.6603  0.6621  0.6501  0.6639  0.6574  0.0043  
REC_5 (testset)   0.0901  0.0907  0.0910  0.0903  0.0888  0.0894  0.0888  0.0898  0.0880  0.0897  0.0896  0.0009  
NDCG_5 (testset)  0.6873  0.6857  0.6875  0.6815  0.6876  0.6900  0.6890  0.6908  0.6817  0.6922  0.6873  0.0034  
PREC_10 (testset) 0.6455  0.6450  0.6472  0.6429  0.6466  0.6512  0.6499  0.6515  0.6408  0.6524  0.6473  0.0037  
REC_10 (testset)  0.1566  0.1563  0.1577  0.1570  0.1540  0.155

In [52]:
algo = SVDpp(verbose=False, n_epochs=45, lr_all=0.0012, reg_all=0.0012, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm SVDpp on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9057  0.9057  0.9056  0.9057  0.9057  0.9056  0.9051  0.9054  0.9053  0.9053  0.9055  0.0002  
MAE (testset)     0.7050  0.7050  0.7052  0.7049  0.7052  0.7052  0.7043  0.7052  0.7048  0.7048  0.7050  0.0003  
PREC_5 (testset)  0.6614  0.6633  0.6602  0.6601  0.6572  0.6712  0.6650  0.6671  0.6544  0.6661  0.6626  0.0047  
REC_5 (testset)   0.0934  0.0927  0.0924  0.0917  0.0912  0.0937  0.0916  0.0920  0.0908  0.0913  0.0921  0.0009  
NDCG_5 (testset)  0.6924  0.6890  0.6903  0.6875  0.6880  0.6967  0.6931  0.6927  0.6861  0.6943  0.6910  0.0032  
PREC_10 (testset) 0.6505  0.6520  0.6498  0.6504  0.6469  0.6593  0.6538  0.6559  0.6452  0.6545  0.6518  0.0040  
REC_10 (testset)  0.1618  0.1597  0.1606  0.1593  0.1580  0.161

### NMF

In [53]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=40, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    0.9971  0.9974  0.9956  0.9970  0.9957  0.9973  0.9960  0.9970  0.9961  0.9978  0.9967  0.0007  
MAE (testset)     0.7730  0.7734  0.7720  0.7731  0.7723  0.7733  0.7722  0.7731  0.7722  0.7738  0.7728  0.0006  
PREC_5 (testset)  0.4664  0.4653  0.4721  0.4697  0.4685  0.4771  0.4716  0.4713  0.4659  0.4697  0.4698  0.0034  
REC_5 (testset)   0.0498  0.0492  0.0501  0.0499  0.0493  0.0502  0.0497  0.0495  0.0491  0.0494  0.0496  0.0004  
NDCG_5 (testset)  0.6299  0.6266  0.6333  0.6307  0.6295  0.6375  0.6348  0.6318  0.6293  0.6322  0.6316  0.0029  
PREC_10 (testset) 0.4631  0.4619  0.4676  0.4655  0.4652  0.4724  0.4673  0.4670  0.4625  0.4650  0.4657  0.0029  
REC_10 (testset)  0.0910  0.0900  0.0909  0.0905  0.0899  0.0909 

In [54]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=30, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    1.0012  1.0015  1.0001  1.0017  1.0005  1.0019  1.0004  1.0015  1.0005  1.0020  1.0011  0.0007  
MAE (testset)     0.7772  0.7776  0.7767  0.7778  0.7771  0.7780  0.7766  0.7777  0.7768  0.7780  0.7773  0.0005  
PREC_5 (testset)  0.4683  0.4700  0.4711  0.4720  0.4719  0.4770  0.4702  0.4742  0.4656  0.4706  0.4711  0.0029  
REC_5 (testset)   0.0498  0.0495  0.0499  0.0498  0.0498  0.0501  0.0498  0.0494  0.0488  0.0494  0.0496  0.0003  
NDCG_5 (testset)  0.6278  0.6261  0.6289  0.6282  0.6297  0.6337  0.6307  0.6306  0.6254  0.6287  0.6290  0.0023  
PREC_10 (testset) 0.4645  0.4663  0.4666  0.4672  0.4673  0.4723  0.4663  0.4700  0.4625  0.4662  0.4669  0.0026  
REC_10 (testset)  0.0908  0.0901  0.0909  0.0901  0.0902  0.0907 

In [55]:
algo = NMF(verbose=False, n_epochs=40, reg_pu=0.19, reg_qi=0.08, lr_bu=0.001, lr_bi=0.001, reg_bu=0.001, reg_bi=0.001, n_factors=20, random_state=123)

start = time.time()
parellel_cold_start_item(algo, data, rate, m_list, random_state=123, n_rating=100, n_exp=10, n_jobs=5)
print(time.time() - start)

Evaluating RMSE, MAE, PREC_5, REC_5, NDCG_5, PREC_10, REC_10, NDCG_10, PREC_15, REC_15, NDCG_15 of algorithm NMF on 10 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Mean    Std     
RMSE (testset)    1.0091  1.0096  1.0070  1.0084  1.0081  1.0087  1.0080  1.0092  1.0080  1.0093  1.0085  0.0008  
MAE (testset)     0.7848  0.7857  0.7835  0.7845  0.7847  0.7849  0.7844  0.7852  0.7843  0.7854  0.7847  0.0006  
PREC_5 (testset)  0.4714  0.4714  0.4738  0.4737  0.4726  0.4756  0.4743  0.4769  0.4690  0.4717  0.4730  0.0022  
REC_5 (testset)   0.0502  0.0493  0.0504  0.0501  0.0496  0.0503  0.0500  0.0499  0.0493  0.0496  0.0499  0.0004  
NDCG_5 (testset)  0.6240  0.6225  0.6268  0.6240  0.6236  0.6293  0.6270  0.6261  0.6231  0.6232  0.6250  0.0021  
PREC_10 (testset) 0.4672  0.4670  0.4695  0.4693  0.4683  0.4708  0.4697  0.4718  0.4648  0.4677  0.4686  0.0019  
REC_10 (testset)  0.0910  0.0895  0.0912  0.0905  0.0898  0.0906 