In [1]:
import pandas as pd
import pickle
from surprise import prediction_algorithms as pa
from surprise import Dataset, Reader
from surprise import evaluate, print_perf

In [2]:
data = pd.read_csv('./movielens_small/ratings.csv')
number_of_rows = len(data)


In [4]:
res_tune = {}
for latent_factors in [5,10,15,20,25,30]:
    res_tune[latent_factors] = {}
    for reg_t in [1,0.5,0.2,0.1,0.05,0.02,0.01]:
        reader = Reader(rating_scale=(0.5, 5))
        train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

        algo = pa.matrix_factorization.NMF(n_factors=latent_factors, n_epochs=100,reg_pu=reg_t, reg_qi=reg_t)

        train_data.split(n_folds=5)

        perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])
        
        res_tune[latent_factors][reg_t] = perf

Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.3275
MAE:  1.1488
FCP:  0.6297
------------
Fold 2
RMSE: 1.3322
MAE:  1.1530
FCP:  0.6238
------------
Fold 3
RMSE: 1.3323
MAE:  1.1549
FCP:  0.6279
------------
Fold 4
RMSE: 1.3303
MAE:  1.1522
FCP:  0.6257
------------
Fold 5
RMSE: 1.3378
MAE:  1.1582
FCP:  0.6166
------------
------------
Mean RMSE: 1.3320
Mean MAE : 1.1534
Mean FCP : 0.6247
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0312
MAE:  0.8447
FCP:  0.6349
------------
Fold 2
RMSE: 1.0246
MAE:  0.8396
FCP:  0.6349
------------
Fold 3
RMSE: 1.0307
MAE:  0.8439
FCP:  0.6410
------------
Fold 4
RMSE: 1.0342
MAE:  0.8467
FCP:  0.6246
------------
Fold 5
RMSE: 1.0253
MAE:  0.8391
FCP:  0.6314
------------
------------
Mean RMSE: 1.0292
Mean MAE : 0.8428
Mean FCP : 0.6333
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 0.9325
MAE:  0.7299
FCP:  0.6402
----

RMSE: 0.9545
MAE:  0.7338
FCP:  0.6349
------------
Fold 4
RMSE: 0.9702
MAE:  0.7448
FCP:  0.6382
------------
Fold 5
RMSE: 0.9588
MAE:  0.7387
FCP:  0.6399
------------
------------
Mean RMSE: 0.9628
Mean MAE : 0.7399
Mean FCP : 0.6373
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0084
MAE:  0.7685
FCP:  0.6282
------------
Fold 2
RMSE: 1.0046
MAE:  0.7652
FCP:  0.6254
------------
Fold 3
RMSE: 0.9999
MAE:  0.7615
FCP:  0.6212
------------
Fold 4
RMSE: 1.0076
MAE:  0.7679
FCP:  0.6261
------------
Fold 5
RMSE: 1.0108
MAE:  0.7716
FCP:  0.6168
------------
------------
Mean RMSE: 1.0062
Mean MAE : 0.7669
Mean FCP : 0.6235
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0378
MAE:  0.7853
FCP:  0.6138
------------
Fold 2
RMSE: 1.0366
MAE:  0.7846
FCP:  0.6231
------------
Fold 3
RMSE: 1.0407
MAE:  0.7899
FCP:  0.6249
------------
Fold 4
RMSE: 1.0315
MAE:  0.7821
FCP:  0.6184
----------

------------
Fold 1
RMSE: 0.9246
MAE:  0.7268
FCP:  0.6521
------------
Fold 2
RMSE: 0.9255
MAE:  0.7280
FCP:  0.6551
------------
Fold 3
RMSE: 0.9216
MAE:  0.7239
FCP:  0.6502
------------
Fold 4
RMSE: 0.9318
MAE:  0.7314
FCP:  0.6464
------------
Fold 5
RMSE: 0.9263
MAE:  0.7276
FCP:  0.6567
------------
------------
Mean RMSE: 0.9260
Mean MAE : 0.7275
Mean FCP : 0.6521
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 0.9323
MAE:  0.7216
FCP:  0.6521
------------
Fold 2
RMSE: 0.9235
MAE:  0.7154
FCP:  0.6440
------------
Fold 3
RMSE: 0.9343
MAE:  0.7275
FCP:  0.6452
------------
Fold 4
RMSE: 0.9118
MAE:  0.7072
FCP:  0.6572
------------
Fold 5
RMSE: 0.9332
MAE:  0.7233
FCP:  0.6537
------------
------------
Mean RMSE: 0.9270
Mean MAE : 0.7190
Mean FCP : 0.6505
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 0.9529
MAE:  0.7306
FCP:  0.6383
------------
Fold 2
RMSE: 0.9468
MAE:  0.7257
FCP

In [5]:
for l in res_tune:
    s = ""
    for j in res_tune[l]:
        s+=str(l)+str(j)+" "+str(sum(res_tune[l][j]['rmse'])/len(res_tune[l][j]['rmse']))+"     "
    s+='\n'
    print s
import pickle
pickle.dump(res_tune,open('nmf_factors_regularization.dat','wb'))

51 1.33200820906     50.5 1.02919977844     50.2 0.931741762808     50.1 0.941648985831     50.05 0.97263237351     50.02 1.20465432804     50.01 1.58775929183     

101 1.33254634787     100.5 1.0302819805     100.2 0.927275651921     100.1 0.934868793986     100.05 0.969482013552     100.02 1.02675488407     100.01 1.09911773249     

151 1.33146487513     150.5 1.03131589924     150.2 0.928855731436     150.1 0.931815260322     150.05 0.962758070936     150.02 1.00624571112     150.01 1.03694173559     

201 1.33107402482     200.5 1.03048548134     200.2 0.928117897781     200.1 0.929681800429     200.05 0.960596851294     200.02 1.03352265144     200.01 1.1497832216     

251 1.33174998614     250.5 1.02903102545     250.2 0.926622734451     250.1 0.927716530493     250.05 0.958286271921     250.02 1.06373504069     250.01 1.26143860047     

301 1.33199665601     300.5 1.029319779     300.2 0.925974013702     300.1 0.927025957194     300.05 0.952331989689     300.02 1.08856369898

In [7]:
res_reg_bias_tune = {}
for learn_bias in [0.02,0.01,0.005,0.002,0.001]:
    res_reg_bias_tune[learn_bias] = {}
    for reg in [0.1,0.05,0.02,0.01]:
        reader = Reader(rating_scale=(0.5, 5))
        train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

        algo = pa.matrix_factorization.NMF(n_factors=15, n_epochs=200,reg_pu=reg, reg_qi=reg, biased = True, reg_bu=reg, reg_bi=reg, lr_bu=learn_bias, lr_bi=learn_bias )

        train_data.split(n_folds=5)

        perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])
        
        res_reg_bias_tune[learn_bias][reg] = perf
pickle.dump(res_reg_bias_tune,open('nmf_bias_reg.dat','wb'))

Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0593
MAE:  0.7988
FCP:  0.6213
------------
Fold 2
RMSE: 0.9454
MAE:  0.7174
FCP:  0.6370
------------
Fold 3
RMSE: 0.9583
MAE:  0.7273
FCP:  0.6463
------------
Fold 4
RMSE: 1.0935
MAE:  0.8186
FCP:  0.6386
------------
Fold 5
RMSE: 0.8978
MAE:  0.6912
FCP:  0.6606
------------
------------
Mean RMSE: 0.9909
Mean MAE : 0.7507
Mean FCP : 0.6408
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0169
MAE:  0.7670
FCP:  0.6325
------------
Fold 2
RMSE: 0.9157
MAE:  0.7030
FCP:  0.6532
------------
Fold 3
RMSE: 0.9105
MAE:  0.7006
FCP:  0.6585
------------
Fold 4
RMSE: 1.0993
MAE:  0.8291
FCP:  0.6151
------------
Fold 5
RMSE: 1.4048
MAE:  1.0282
FCP:  0.5778
------------
------------
Mean RMSE: 1.0694
Mean MAE : 0.8056
Mean FCP : 0.6274
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0533
MAE:  0.7869
FCP:  0.6406
----

RMSE: 0.9669
MAE:  0.7378
FCP:  0.6327
------------
Fold 4
RMSE: 0.9708
MAE:  0.7457
FCP:  0.6372
------------
Fold 5
RMSE: 0.9625
MAE:  0.7374
FCP:  0.6358
------------
------------
Mean RMSE: 0.9667
Mean MAE : 0.7400
Mean FCP : 0.6377
------------
------------
Evaluating RMSE, MAE, FCP of algorithm NMF.

------------
Fold 1
RMSE: 1.0140
MAE:  0.7715
FCP:  0.6340
------------
Fold 2
RMSE: 1.0147
MAE:  0.7746
FCP:  0.6324
------------
Fold 3
RMSE: 1.0147
MAE:  0.7714
FCP:  0.6334
------------
Fold 4
RMSE: 1.0177
MAE:  0.7757
FCP:  0.6329
------------
Fold 5
RMSE: 1.0236
MAE:  0.7792
FCP:  0.6299
------------
------------
Mean RMSE: 1.0170
Mean MAE : 0.7745
Mean FCP : 0.6325
------------
------------


In [8]:
for l in res_reg_bias_tune:
    s = ""
    for j in res_reg_bias_tune[l]:
        s+=str(l)+str(j)+" "+str(sum(res_reg_bias_tune[l][j]['rmse'])/len(res_reg_bias_tune[l][j]['rmse']))+"     "
    s+='\n'
    print s

0.020.1 0.990867703499     0.020.05 1.06943009138     0.020.02 1.09409184251     0.020.01 1.11279994163     

0.010.1 0.905684618822     0.010.05 0.952751460819     0.010.02 1.06519872051     0.010.01 1.29309673173     

0.0050.1 0.968266527679     0.0050.05 0.938715849323     0.0050.02 1.17934461603     0.0050.01 1.20542166817     

0.0020.1 0.902203848564     0.0020.05 1.02624165919     0.0020.02 0.970819904696     0.0020.01 1.29570986636     

0.0010.1 0.879954714485     0.0010.05 0.899100975189     0.0010.02 0.966737160187     0.0010.01 1.01695999776     



In [9]:
svd_fact = {}
for latent_factors in [10,20,30,40,50,60,70,80,90,100]:
    svd_fact[latent_factors] = {}
    reader = Reader(rating_scale=(0.5, 5))
    train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

    algo = pa.matrix_factorization.SVD(n_factors=latent_factors, n_epochs=100)

    train_data.split(n_folds=5)

    perf = evaluate(algo, train_data, measures=['RMSE', 'MAAE','FCP'])

    svd_fact[latent_factors] = perf

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9407
MAE:  0.7176
FCP:  0.6389
------------
Fold 2
RMSE: 0.9352
MAE:  0.7139
FCP:  0.6409
------------
Fold 3
RMSE: 0.9361
MAE:  0.7147
FCP:  0.6404
------------
Fold 4
RMSE: 0.9351
MAE:  0.7155
FCP:  0.6417
------------
Fold 5
RMSE: 0.9451
MAE:  0.7208
FCP:  0.6310
------------
------------
Mean RMSE: 0.9384
Mean MAE : 0.7165
Mean FCP : 0.6386
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9434
MAE:  0.7226
FCP:  0.6406
------------
Fold 2
RMSE: 0.9499
MAE:  0.7261
FCP:  0.6317
------------
Fold 3
RMSE: 0.9466
MAE:  0.7249
FCP:  0.6235
------------
Fold 4
RMSE: 0.9468
MAE:  0.7278
FCP:  0.6368
------------
Fold 5
RMSE: 0.9481
MAE:  0.7257
FCP:  0.6338
------------
------------
Mean RMSE: 0.9470
Mean MAE : 0.7254
Mean FCP : 0.6333
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9456
MAE:  0.7235
FCP:  0.6312
----

In [11]:
for l in svd_fact:
    s = ""
    s+=str(l)+" "+str(sum(svd_fact[l]['rmse'])/len(svd_fact[l]['rmse']))+"     "
    s+='\n'
    print s
pickle.dump(svd_fact,open('svd_factors.dat','wb'))

10 0.938422022494     

20 0.946963114637     

30 0.948099233241     

40 0.938776832261     

50 0.932368939007     

60 0.924484407046     

70 0.922479727492     

80 0.91786079312     

90 0.910934926967     

100 0.909183202499     



In [13]:
svd_fact = {}
for latent_factors in [110,120,130,140,150]:
    svd_fact[latent_factors] = {}
    reader = Reader(rating_scale=(0.5, 5))
    train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

    algo = pa.matrix_factorization.SVD(n_factors=latent_factors, n_epochs=100)

    train_data.split(n_folds=5)

    perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])

    svd_fact[latent_factors] = perf

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9042
MAE:  0.6946
FCP:  0.6468
------------
Fold 2
RMSE: 0.9093
MAE:  0.6971
FCP:  0.6393
------------
Fold 3
RMSE: 0.9131
MAE:  0.7032
FCP:  0.6452
------------
Fold 4
RMSE: 0.9020
MAE:  0.6954
FCP:  0.6380
------------
Fold 5
RMSE: 0.9133
MAE:  0.7031
FCP:  0.6395
------------
------------
Mean RMSE: 0.9084
Mean MAE : 0.6987
Mean FCP : 0.6418
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9051
MAE:  0.6964
FCP:  0.6407
------------
Fold 2
RMSE: 0.9024
MAE:  0.6944
FCP:  0.6409
------------
Fold 3
RMSE: 0.9080
MAE:  0.6972
FCP:  0.6362
------------
Fold 4
RMSE: 0.9025
MAE:  0.6961
FCP:  0.6460
------------
Fold 5
RMSE: 0.9058
MAE:  0.6990
FCP:  0.6338
------------
------------
Mean RMSE: 0.9048
Mean MAE : 0.6966
Mean FCP : 0.6395
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8956
MAE:  0.6901
FCP:  0.6470
----

In [14]:
pickle.dump(svd_fact,open('svd_factors_2.dat','wb'))

In [15]:
svd_bias_reg = {}
for learn_bias in [0.02,0.01,0.005,0.002,0.001]:
    svd_bias_reg[learn_bias] = {}
    for reg in [0.1,0.05,0.02,0.01]:
        reader = Reader(rating_scale=(0.5, 5))
        train_data = Dataset.load_from_df(data[['userId','movieId','rating']], reader)

        algo = pa.matrix_factorization.SVD(n_factors=100, n_epochs=100,reg_all=reg, biased = True, lr_all=learn_bias )

        train_data.split(n_folds=5)

        perf = evaluate(algo, train_data, measures=['RMSE', 'MAE','FCP'])
        
        svd_bias_reg[learn_bias][reg] = perf
pickle.dump(svd_bias_reg,open('svd_bias_reg.dat','wb'))

Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8722
MAE:  0.6687
FCP:  0.6743
------------
Fold 2
RMSE: 0.8807
MAE:  0.6791
FCP:  0.6704
------------
Fold 3
RMSE: 0.8785
MAE:  0.6737
FCP:  0.6660
------------
Fold 4
RMSE: 0.8730
MAE:  0.6685
FCP:  0.6623
------------
Fold 5
RMSE: 0.8749
MAE:  0.6717
FCP:  0.6629
------------
------------
Mean RMSE: 0.8759
Mean MAE : 0.6723
Mean FCP : 0.6672
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.8786
MAE:  0.6736
FCP:  0.6690
------------
Fold 2
RMSE: 0.8833
MAE:  0.6761
FCP:  0.6637
------------
Fold 3
RMSE: 0.8833
MAE:  0.6796
FCP:  0.6676
------------
Fold 4
RMSE: 0.8825
MAE:  0.6757
FCP:  0.6565
------------
Fold 5
RMSE: 0.8865
MAE:  0.6815
FCP:  0.6656
------------
------------
Mean RMSE: 0.8828
Mean MAE : 0.6773
Mean FCP : 0.6645
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9004
MAE:  0.6924
FCP:  0.6553
----

RMSE: 0.8965
MAE:  0.6931
FCP:  0.6382
------------
Fold 4
RMSE: 0.8927
MAE:  0.6869
FCP:  0.6324
------------
Fold 5
RMSE: 0.8989
MAE:  0.6904
FCP:  0.6396
------------
------------
Mean RMSE: 0.8969
Mean MAE : 0.6905
Mean FCP : 0.6360
------------
------------
Evaluating RMSE, MAE, FCP of algorithm SVD.

------------
Fold 1
RMSE: 0.9058
MAE:  0.6947
FCP:  0.6247
------------
Fold 2
RMSE: 0.9120
MAE:  0.6997
FCP:  0.6234
------------
Fold 3
RMSE: 0.8956
MAE:  0.6899
FCP:  0.6321
------------
Fold 4
RMSE: 0.9007
MAE:  0.6943
FCP:  0.6284
------------
Fold 5
RMSE: 0.8968
MAE:  0.6936
FCP:  0.6295
------------
------------
Mean RMSE: 0.9022
Mean MAE : 0.6944
Mean FCP : 0.6276
------------
------------


In [16]:
for l in svd_bias_reg:
    s = ""
    for j in svd_bias_reg[l]:
        s+=str(l)+str(j)+" "+str(sum(svd_bias_reg[l][j]['rmse'])/len(svd_bias_reg[l][j]['rmse']))+"     "
    s+='\n'
    print s

0.020.1 0.87585934029     0.020.05 0.882846519288     0.020.02 0.897437232431     0.020.01 0.914870184894     

0.010.1 0.873931761755     0.010.05 0.882074373913     0.010.02 0.90288574025     0.010.01 0.923099778028     

0.0050.1 0.872742839599     0.0050.05 0.883611177662     0.0050.02 0.910407899671     0.0050.01 0.934212777712     

0.0020.1 0.88316673376     0.0020.05 0.882311826213     0.0020.02 0.903639892091     0.0020.01 0.922477143628     

0.0010.1 0.890929487654     0.0010.05 0.890416172623     0.0010.02 0.896853506675     0.0010.01 0.902190092614     

