In [3]:
import numpy as np
import pandas as pd
import os.path
from random import randint

ratings_file = './data/ratings.csv'
predictions_file = './data/predictions.csv'

ratings_description = pd.read_csv(ratings_file, delimiter=';',
                                  dtype={'userID': 'int', 'movieID': 'int', 'rating': 'int'},
                                  names=['userID', 'movieID', 'rating'])

num_movies = max(ratings_description["movieID"])
num_users = max(ratings_description["userID"])
R = np.zeros(( num_movies, num_users))

for user, movie, rating in ratings_description.values:
    R[movie-1, user-1] = rating

R[R==0] = np.nan
print(f"Shape of Utility matrix is (movies, users): {R.shape}")
R

Shape of Utility matrix is (movies, users): (3706, 6040)


array([[ 5., nan, nan, ..., nan, nan,  3.],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [17]:
from tqdm import tqdm_notebook as tqdm

EPOCHS = 1500
LEARNING_RATE = 0.02 # == nu
LAMBDA = 0.1
K = 2 # number of factors to work with.

# np.random.seed(42)
Q = np.random.uniform(-1, 1, (R.shape[0], K))
P = np.random.uniform(-1, 1, (K, R.shape[1]))
regularization_learning_curve = []

div = (R.shape[0] * R.shape[1]) - np.isnan(R).sum()
RMSE = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
print(f"Starting RMSE: {RMSE}")


for epoch in tqdm(range(EPOCHS)):
    R_pred = np.matmul(Q,P)
    curr_error = np.nan_to_num(R - R_pred, 0)
    Q_update = np.zeros(Q.shape)
    for i in range(len(Q_update)):
        for curr_k in range(K):
            Q_delta =(-2 * np.dot(P[curr_k, :], curr_error[i]))/np.isnan(R[i]).sum()
            Q_update[i, curr_k] = LEARNING_RATE * (Q_delta + LAMBDA*Q[i, curr_k])

    P_update = np.zeros(P.shape)
    for i in range(P_update.shape[1]):
        for curr_k in range(K):
            P_delta =(-2 * np.dot(Q[:, curr_k], curr_error[:, i]))/np.isnan(R[:, i]).sum()
            P_update[curr_k, i] = LEARNING_RATE * (P_delta + LAMBDA*P[curr_k, i])

    Q -= Q_update
    P -= P_update
    
    RMSE_i = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
    print(f"RMSE {epoch}: {RMSE_i}")
    regularization_learning_curve.append([epoch, RMSE_i])


RMSE = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
print(f"Final RMSE: {RMSE}")

Starting RMSE: 3.7818907428629025


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(EPOCHS)):


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1500.0), HTML(value='')))

RMSE 0: 3.7813765411267513
RMSE 1: 3.780872828453888
RMSE 2: 3.780379317483961
RMSE 3: 3.7798957307361998
RMSE 4: 3.7794218001957396
RMSE 5: 3.778957266919991
RMSE 6: 3.7785018806637027
RMSE 7: 3.7780553995219055
RMSE 8: 3.7776175895897026
RMSE 9: 3.7771882246379787
RMSE 10: 3.7767670858042255
RMSE 11: 3.776353961297625
RMSE 12: 3.7759486461177185
RMSE 13: 3.775550941785919
RMSE 14: 3.7751606560891044
RMSE 15: 3.7747776028348654
RMSE 16: 3.774401601617604
RMSE 17: 3.7740324775950502
RMSE 18: 3.773670061274636
RMSE 19: 3.77331418830926
RMSE 20: 3.7729646993018284
RMSE 21: 3.772621439618378
RMSE 22: 3.772284259209128
RMSE 23: 3.7719530124371645
RMSE 24: 3.7716275579144356
RMSE 25: 3.771307758344562
RMSE 26: 3.7709934803722476
RMSE 27: 3.7706845944389418
RMSE 28: 3.7703809746443833
RMSE 29: 3.770082498613852
RMSE 30: 3.7697890473708258
RMSE 31: 3.769500505214647
RMSE 32: 3.7692167596032156
RMSE 33: 3.7689377010402176
RMSE 34: 3.7686632229668904
RMSE 35: 3.768393221657912
RMSE 36: 3.768127

RMSE 292: 3.597374835278699
RMSE 293: 3.5933820573273723
RMSE 294: 3.589300860849821
RMSE 295: 3.5851300873193326
RMSE 296: 3.580868605701108
RMSE 297: 3.5765153150059366
RMSE 298: 3.572069146901871
RMSE 299: 3.567529068379361
RMSE 300: 3.5628940844646952
RMSE 301: 3.558163240975918
RMSE 302: 3.5533356273152092
RMSE 303: 3.5484103792906994
RMSE 304: 3.543386681960543
RMSE 305: 3.538263772491331
RMSE 306: 3.5330409430223186
RMSE 307: 3.527717543526613
RMSE 308: 3.5222929846597326
RMSE 309: 3.5167667405855316
RMSE 310: 3.5111383517691444
RMSE 311: 3.505407427725804
RMSE 312: 3.4995736497143937
RMSE 313: 3.4936367733638845
RMSE 314: 3.487596631220697
RMSE 315: 3.4814531352045344
RMSE 316: 3.4752062789604827
RMSE 317: 3.4688561400941613
RMSE 318: 3.462402882277772
RMSE 319: 3.4558467572137572
RMSE 320: 3.449188106443756
RMSE 321: 3.442427362990062
RMSE 322: 3.435565052817412
RMSE 323: 3.4286017961032385
RMSE 324: 3.421538308304631
RMSE 325: 3.4143754010113097
RMSE 326: 3.407113982574056
RM

RMSE 579: 1.8880357538752564
RMSE 580: 1.8857173185585
RMSE 581: 1.883414537263552
RMSE 582: 1.8811272832512267
RMSE 583: 1.8788554309555994
RMSE 584: 1.8765988559727655
RMSE 585: 1.8743574350496601
RMSE 586: 1.8721310460729605
RMSE 587: 1.8699195680581076
RMSE 588: 1.867722881138364
RMSE 589: 1.8655408665540107
RMSE 590: 1.8633734066415872
RMSE 591: 1.8612203848232483
RMSE 592: 1.8590816855961818
RMSE 593: 1.8569571945221635
RMSE 594: 1.8548467982171177
RMSE 595: 1.8527503843408704
RMSE 596: 1.850667841586873
RMSE 597: 1.8485990596721207
RMSE 598: 1.846543929327099
RMSE 599: 1.8445023422858198
RMSE 600: 1.8424741912759643
RMSE 601: 1.840459370009116
RMSE 602: 1.8384577731710499
RMSE 603: 1.8364692964121487
RMSE 604: 1.8344938363378596
RMSE 605: 1.8325312904993085
RMSE 606: 1.8305815573839066
RMSE 607: 1.8286445364061217
RMSE 608: 1.8267201278982967
RMSE 609: 1.824808233101567
RMSE 610: 1.8229087541568592
RMSE 611: 1.8210215940959464
RMSE 612: 1.8191466568326737
RMSE 613: 1.81728384715

RMSE 865: 1.5748663920924844
RMSE 866: 1.5744182227695813
RMSE 867: 1.5739722775631826
RMSE 868: 1.5735285437011706
RMSE 869: 1.5730870084969257
RMSE 870: 1.572647659348672
RMSE 871: 1.5722104837388098
RMSE 872: 1.5717754692332708
RMSE 873: 1.5713426034808593
RMSE 874: 1.5709118742126174
RMSE 875: 1.5704832692411774
RMSE 876: 1.5700567764601585
RMSE 877: 1.5696323838434942
RMSE 878: 1.569210079444875
RMSE 879: 1.568789851397068
RMSE 880: 1.5683716879113623
RMSE 881: 1.5679555772769305
RMSE 882: 1.5675415078602515
RMSE 883: 1.5671294681045045
RMSE 884: 1.5667194465289884
RMSE 885: 1.5663114317285378
RMSE 886: 1.5659054123729512
RMSE 887: 1.5655013772063984
RMSE 888: 1.5650993150468826
RMSE 889: 1.5646992147856595
RMSE 890: 1.564301065386678
RMSE 891: 1.5639048558860438
RMSE 892: 1.5635105753914529
RMSE 893: 1.5631182130816679
RMSE 894: 1.5627277582059593
RMSE 895: 1.5623392000835865
RMSE 896: 1.5619525281032691
RMSE 897: 1.5615677317226577
RMSE 898: 1.561184800467809
RMSE 899: 1.5608037

RMSE 1145: 1.5049442212264172
RMSE 1146: 1.5048185609643483
RMSE 1147: 1.5046934263752485
RMSE 1148: 1.504568815010337
RMSE 1149: 1.504444724433661
RMSE 1150: 1.504321152222002
RMSE 1151: 1.5041980959648245
RMSE 1152: 1.5040755532641754
RMSE 1153: 1.5039535217346238
RMSE 1154: 1.5038319990031928
RMSE 1155: 1.5037109827092656
RMSE 1156: 1.5035904705045378
RMSE 1157: 1.503470460052918
RMSE 1158: 1.503350949030479
RMSE 1159: 1.50323193512538
RMSE 1160: 1.5031134160377826
RMSE 1161: 1.5029953894797996
RMSE 1162: 1.5028778531754137
RMSE 1163: 1.5027608048603998
RMSE 1164: 1.502644242282286
RMSE 1165: 1.5025281632002438
RMSE 1166: 1.5024125653850589
RMSE 1167: 1.5022974466190302
RMSE 1168: 1.502182804695933
RMSE 1169: 1.5020686374209213
RMSE 1170: 1.5019549426104855
RMSE 1171: 1.5018417180923815
RMSE 1172: 1.5017289617055527
RMSE 1173: 1.5016166713000798
RMSE 1174: 1.5015048447371067
RMSE 1175: 1.5013934798887822
RMSE 1176: 1.5012825746381917
RMSE 1177: 1.5011721268792877
RMSE 1178: 1.501062

RMSE 1421: 1.4840080524781227
RMSE 1422: 1.4839658903090056
RMSE 1423: 1.483923886064191
RMSE 1424: 1.483882039098493
RMSE 1425: 1.4838403487696288
RMSE 1426: 1.4837988144381749
RMSE 1427: 1.4837574354675724
RMSE 1428: 1.4837162112241016
RMSE 1429: 1.4836751410768738
RMSE 1430: 1.4836342243978144
RMSE 1431: 1.4835934605616496
RMSE 1432: 1.4835528489458967
RMSE 1433: 1.4835123889308388
RMSE 1434: 1.4834720798995251
RMSE 1435: 1.4834319212377547
RMSE 1436: 1.4833919123340498
RMSE 1437: 1.4833520525796566
RMSE 1438: 1.4833123413685316
RMSE 1439: 1.4832727780973232
RMSE 1440: 1.4832333621653504
RMSE 1441: 1.4831940929746095
RMSE 1442: 1.4831549699297482
RMSE 1443: 1.4831159924380481
RMSE 1444: 1.4830771599094272
RMSE 1445: 1.4830384717564136
RMSE 1446: 1.4829999273941359
RMSE 1447: 1.482961526240316
RMSE 1448: 1.4829232677152508
RMSE 1449: 1.482885151241799
RMSE 1450: 1.4828471762453745
RMSE 1451: 1.4828093421539281
RMSE 1452: 1.4827716483979396
RMSE 1453: 1.4827340944103986
RMSE 1454: 1.4

In [9]:
from numpy import savetxt
from pathlib import Path

Path("./lf_reg/").mkdir(parents=True, exist_ok=True)


savetxt("lf_reg/Q.csv", Q)
savetxt("lf_reg/P.csv", P)
savetxt("lf_reg/objectives.csv", regularization_learning_curve)

In [None]:
#### CREATE SUBMISSION ####
predictions_description = pd.read_csv(predictions_file, delimiter=';', names=['userID', 'movieID'], header=None)
submission = []
R_pred = np.matmul(Q, P)
for i, [user,movie] in enumerate(predictions_description.values):
    submission.append([i+1, R_pred[movie-1,user-1]])

submission_df = pd.DataFrame(submission, columns=["Id", "Rating"])
submission_df.to_csv("lf_reg/submission.csv", index=False)