In [1]:
import numpy as np
import pandas as pd
import os.path
from random import randint

ratings_file = './data/ratings.csv'
predictions_file = './data/predictions.csv'

ratings_description = pd.read_csv(ratings_file, delimiter=';',
                                  dtype={'userID': 'int', 'movieID': 'int', 'rating': 'int'},
                                  names=['userID', 'movieID', 'rating'])

num_movies = max(ratings_description["movieID"])
num_users = max(ratings_description["userID"])
R = np.zeros(( num_movies, num_users))

for user, movie, rating in ratings_description.values:
    R[movie-1, user-1] = rating

R[R==0] = np.nan
print(f"Shape of Utility matrix is (movies, users): {R.shape}")
R

Shape of Utility matrix is (movies, users): (3706, 6040)


array([[ 5., nan, nan, ..., nan, nan,  3.],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [6]:
from tqdm import tqdm_notebook as tqdm

EPOCHS = 1500
LEARNING_RATE = 0.05 # == nu
LAMBDA = 0.01
K = 2 # number of factors to work with.

np.random.seed(42)
Q = np.random.uniform(-1, 1, (R.shape[0], K))
P = np.random.uniform(-1, 1, (K, R.shape[1]))
div = (R.shape[0] * R.shape[1]) - np.isnan(R).sum()
RMSE = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
print(f"Starting RMSE: {RMSE}")

regularization_learning_curve = []

for epoch in tqdm(range(1500)):
    R_pred = np.matmul(Q,P)
    curr_error = np.nan_to_num(R - R_pred, 0)
    Q_update = np.zeros(Q.shape)
    for i in range(len(Q_update)):
        for curr_k in range(K):
            Q_delta =(-2 * np.dot(P[curr_k, :], curr_error[i]))/np.isnan(R[i]).sum()
            Q_update[i, curr_k] = LEARNING_RATE * (Q_delta + LAMBDA*Q[i, curr_k])

    P_update = np.zeros(P.shape)
    for i in range(P_update.shape[1]):
        for curr_k in range(K):
            P_delta =(-2 * np.dot(Q[:, curr_k], curr_error[:, i]))/np.isnan(R[:, i]).sum()
            P_update[curr_k, i] = LEARNING_RATE * (P_delta + LAMBDA*P[curr_k, i])

    Q -= Q_update
    P -= P_update
    
    RMSE_i = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
    print(f"RMSE {epoch}: {RMSE_i}")
    regularization_learning_curve.append([epoch, RMSE_i])


RMSE = np.sqrt(((np.nan_to_num(R - np.matmul(Q, P), 0)**2).sum())/div)
print(f"Final RMSE: {RMSE}")

Starting RMSE: 3.781830053581588


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm(range(1500)):


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1500.0), HTML(value='')))

RMSE 0: 3.7810569863061056
RMSE 1: 3.7803143482127624
RMSE 2: 3.779600254864564
RMSE 3: 3.7789129704709286
RMSE 4: 3.778250893695214
RMSE 5: 3.777612544956513
RMSE 6: 3.7769965550515123
RMSE 7: 3.7764016549438972
RMSE 8: 3.775826666587716
RMSE 9: 3.7752704946672684
RMSE 10: 3.7747321191502476
RMSE 11: 3.7742105885634096
RMSE 12: 3.7737050139101416
RMSE 13: 3.7732145631594074
RMSE 14: 3.7727384562430437
RMSE 15: 3.7722759605057528
RMSE 16: 3.7718263865583967
RMSE 17: 3.7713890844906057
RMSE 18: 3.7709634404033237
RMSE 19: 3.770548873226374
RMSE 20: 3.7701448317896378
RMSE 21: 3.7697507921195914
RMSE 22: 3.769366254935952
RMSE 23: 3.7689907433255794
RMSE 24: 3.7686238005728576
RMSE 25: 3.7682649881278585
RMSE 26: 3.7679138836952064
RMSE 27: 3.767570079427781
RMSE 28: 3.767233180211209
RMSE 29: 3.7669028020254043
RMSE 30: 3.7665785703712467
RMSE 31: 3.766260118750528
RMSE 32: 3.765947087188441
RMSE 33: 3.7656391207883746
RMSE 34: 3.7653358683090308
RMSE 35: 3.7650369807546875
RMSE 36: 3.7

RMSE 290: 1.3591045088586962
RMSE 291: 1.3557129118222715
RMSE 292: 1.3523605188838517
RMSE 293: 1.3490467232823815
RMSE 294: 1.3457709300497775
RMSE 295: 1.342532555739377
RMSE 296: 1.339331028161372
RMSE 297: 1.336165786125044
RMSE 298: 1.333036279187646
RMSE 299: 1.329941967409705
RMSE 300: 1.3268823211166212
RMSE 301: 1.3238568206663381
RMSE 302: 1.3208649562229728
RMSE 303: 1.3179062275362
RMSE 304: 1.3149801437262567
RMSE 305: 1.312086223074425
RMSE 306: 1.309223992818795
RMSE 307: 1.3063929889552262
RMSE 308: 1.3035927560433156
RMSE 309: 1.30082284701728
RMSE 310: 1.2980828230015713
RMSE 311: 1.2953722531311314
RMSE 312: 1.2926907143761444
RMSE 313: 1.2900377913711794
RMSE 314: 1.2874130762485765
RMSE 315: 1.2848161684759838
RMSE 316: 1.2822466746979309
RMSE 317: 1.279704208581327
RMSE 318: 1.2771883906647579
RMSE 319: 1.2746988482115058
RMSE 320: 1.2722352150662015
RMSE 321: 1.269797131514943
RMSE 322: 1.2673842441488905
RMSE 323: 1.2649962057311337
RMSE 324: 1.262632675066859


RMSE 576: 1.0190555055352626
RMSE 577: 1.0186999756985025
RMSE 578: 1.0183464533090778
RMSE 579: 1.0179949235977894
RMSE 580: 1.017645371928985
RMSE 581: 1.0172977837991253
RMSE 582: 1.0169521448353587
RMSE 583: 1.016608440794124
RMSE 584: 1.0162666575597674
RMSE 585: 1.0159267811431685
RMSE 586: 1.0155887976804001
RMSE 587: 1.0152526934313884
RMSE 588: 1.0149184547785972
RMSE 589: 1.014586068225731
RMSE 590: 1.0142555203964472
RMSE 591: 1.0139267980330913
RMSE 592: 1.0135998879954375
RMSE 593: 1.0132747772594608
RMSE 594: 1.0129514529161057
RMSE 595: 1.0126299021700846
RMSE 596: 1.0123101123386886
RMSE 597: 1.0119920708505965
RMSE 598: 1.0116757652447248
RMSE 599: 1.0113611831690712
RMSE 600: 1.0110483123795848
RMSE 601: 1.0107371407390344
RMSE 602: 1.0104276562159116
RMSE 603: 1.010119846883326
RMSE 604: 1.0098137009179349
RMSE 605: 1.009509206598861
RMSE 606: 1.0092063523066515
RMSE 607: 1.0089051265222189
RMSE 608: 1.008605517825825
RMSE 609: 1.0083075148960512
RMSE 610: 1.00801110

RMSE 861: 0.9648583474776129
RMSE 862: 0.9647623079886576
RMSE 863: 0.9646666277128391
RMSE 864: 0.9645713048544006
RMSE 865: 0.9644763376284827
RMSE 866: 0.9643817242610431
RMSE 867: 0.9642874629887793
RMSE 868: 0.9641935520590502
RMSE 869: 0.9640999897297982
RMSE 870: 0.9640067742694741
RMSE 871: 0.9639139039569661
RMSE 872: 0.9638213770815146
RMSE 873: 0.9637291919426465
RMSE 874: 0.9636373468500973
RMSE 875: 0.9635458401237379
RMSE 876: 0.9634546700935102
RMSE 877: 0.9633638350993411
RMSE 878: 0.9632733334910838
RMSE 879: 0.963183163628439
RMSE 880: 0.9630933238808865
RMSE 881: 0.9630038126276207
RMSE 882: 0.9629146282574768
RMSE 883: 0.9628257691688596
RMSE 884: 0.9627372337696845
RMSE 885: 0.9626490204773023
RMSE 886: 0.9625611277184327
RMSE 887: 0.9624735539291106
RMSE 888: 0.9623862975545986
RMSE 889: 0.9622993570493386
RMSE 890: 0.9622127308768834
RMSE 891: 0.9621264175098335
RMSE 892: 0.9620404154297638
RMSE 893: 0.9619547231271763
RMSE 894: 0.9618693391014292
RMSE 895: 0.961

RMSE 1140: 0.9474829216506271
RMSE 1141: 0.9474432273516391
RMSE 1142: 0.9474036376080253
RMSE 1143: 0.9473641520050494
RMSE 1144: 0.9473247701298412
RMSE 1145: 0.9472854915714233
RMSE 1146: 0.94724631592067
RMSE 1147: 0.9472072427703098
RMSE 1148: 0.9471682717149165
RMSE 1149: 0.9471294023509006
RMSE 1150: 0.9470906342764885
RMSE 1151: 0.9470519670917268
RMSE 1152: 0.9470134003984615
RMSE 1153: 0.9469749338003338
RMSE 1154: 0.9469365669027718
RMSE 1155: 0.9468982993129739
RMSE 1156: 0.9468601306399039
RMSE 1157: 0.9468220604942871
RMSE 1158: 0.946784088488585
RMSE 1159: 0.9467462142370082
RMSE 1160: 0.9467084373554857
RMSE 1161: 0.9466707574616672
RMSE 1162: 0.9466331741749097
RMSE 1163: 0.9465956871162757
RMSE 1164: 0.9465582959085125
RMSE 1165: 0.9465210001760538
RMSE 1166: 0.9464837995450026
RMSE 1167: 0.9464466936431267
RMSE 1168: 0.94640968209985
RMSE 1169: 0.9463727645462452
RMSE 1170: 0.9463359406150165
RMSE 1171: 0.9462992099405029
RMSE 1172: 0.9462625721586575
RMSE 1173: 0.94

RMSE 1415: 0.9394230907042115
RMSE 1416: 0.9394012177515174
RMSE 1417: 0.9393793820758369
RMSE 1418: 0.939357583543339
RMSE 1419: 0.9393358220207095
RMSE 1420: 0.9393140973751339
RMSE 1421: 0.9392924094743101
RMSE 1422: 0.939270758186433
RMSE 1423: 0.9392491433802042
RMSE 1424: 0.9392275649248217
RMSE 1425: 0.9392060226899794
RMSE 1426: 0.9391845165458664
RMSE 1427: 0.9391630463631677
RMSE 1428: 0.9391416120130595
RMSE 1429: 0.9391202133672047
RMSE 1430: 0.9390988502977541
RMSE 1431: 0.9390775226773448
RMSE 1432: 0.9390562303790967
RMSE 1433: 0.9390349732766102
RMSE 1434: 0.9390137512439692
RMSE 1435: 0.9389925641557308
RMSE 1436: 0.9389714118869273
RMSE 1437: 0.9389502943130681
RMSE 1438: 0.9389292113101355
RMSE 1439: 0.9389081627545772
RMSE 1440: 0.9388871485233128
RMSE 1441: 0.9388661684937237
RMSE 1442: 0.9388452225436664
RMSE 1443: 0.9388243105514446
RMSE 1444: 0.938803432395836
RMSE 1445: 0.938782587956071
RMSE 1446: 0.9387617771118397
RMSE 1447: 0.9387409997432858
RMSE 1448: 0.9