In [None]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 5.0 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1619412 sha256=021bed3524e87db7fa1c510ad31853aa67c8e0ba07cca46cebc699c4db48392a
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


## Loading Libraries

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate
from surprise.model_selection import KFold

## Utilities

In [None]:
dir_path = 'drive/MyDrive/ADM/movielens/'

In [None]:
def movie_rec(y_bar, user_id, movies, y, num):
  user_row = user_id - 1
  sorted_preds = y_bar.iloc[user_row].sort_values(ascending=False)

  user_data = y[y.user_id == user_id]
  merged = user_data.merge(movies, how='left', left_on='movie_id', right_on='movie_id').sort_values(['rating'], ascending=False)

  res = movies[~movies['movie_id'].isin(merged['movie_id'])].merge(pd.DataFrame(sorted_preds).reset_index(), how = 'left', left_on = 'movie_id', right_on = 'movie_id').rename(columns = {user_row: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num, :-1]

  return merged, res

## Loading Preprocessed Dataset

In [None]:
ratings_data = pd.read_csv(dir_path + 'ratings_1.csv', encoding='latin-1')
ratings_data.drop(columns=['Unnamed: 0'], inplace=True)
users_data = pd.read_csv(dir_path + 'users_1.csv', encoding='latin-1')
users_data.drop(columns=['Unnamed: 0'], inplace=True)
movies_data = pd.read_csv(dir_path + 'movies_1.csv', encoding='latin-1')
movies_data.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
num_users = ratings_data['user_id'].drop_duplicates().max()
num_items = ratings_data['movie_id'].drop_duplicates().max()

In [None]:
ratings_data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp,user_embeded_id,movie_embeded_id
0,1,1193,5,978300760,0,1192
1,1,661,3,978302109,0,660
2,1,914,3,978301968,0,913
3,1,3408,4,978300275,0,3407
4,1,2355,5,978824291,0,2354


In [None]:
users_data.head()

Unnamed: 0,user_id,gender,age,occupation,zipcode,age_enc,occ_enc
0,1,F,1,10,48067,Under 18,K-12 student
1,2,M,56,16,70072,56+,self-employed
2,3,M,25,15,55117,25-34,scientist
3,4,M,45,7,2460,45-49,executive/managerial
4,5,M,25,20,55455,25-34,writer


In [None]:
movies_data.head()

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings = ratings_data.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
ratings

movie_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,3913,3914,3915,3916,3917,3918,3919,3920,3921,3922,3923,3924,3925,3926,3927,3928,3929,3930,3931,3932,3933,3934,3935,3936,3937,3938,3939,3940,3941,3942,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,0.0,4.0,0.0,3.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,4.0,3.0,0.0,4.0,3.0,4.0,0.0,3.0,0.0,5.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# de normalizing rating matrix
rating_matrix = ratings.values
mean_rating = np.mean(rating_matrix, axis=1)
rating_matrix = rating_matrix - mean_rating.reshape(-1, 1)

In [None]:
rating_matrix

array([[ 4.94009714, -0.05990286, -0.05990286, ..., -0.05990286,
        -0.05990286, -0.05990286],
       [-0.12924987, -0.12924987, -0.12924987, ..., -0.12924987,
        -0.12924987, -0.12924987],
       [-0.05369671, -0.05369671, -0.05369671, ..., -0.05369671,
        -0.05369671, -0.05369671],
       ...,
       [-0.02050729, -0.02050729, -0.02050729, ..., -0.02050729,
        -0.02050729, -0.02050729],
       [-0.1287102 , -0.1287102 , -0.1287102 , ..., -0.1287102 ,
        -0.1287102 , -0.1287102 ],
       [ 2.6708041 , -0.3291959 , -0.3291959 , ..., -0.3291959 ,
        -0.3291959 , -0.3291959 ]])

## SVD - Support vector Decomposition

In [None]:
# setting up SVD model
mu, sigma, vt = svds(rating_matrix, k = 50)

In [None]:
sigma = np.diag(sigma)
sigma

array([[ 147.18581225,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,  147.62154312,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,  148.58855276, ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,  574.46932602,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
         670.41536276,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        , 1544.10679346]])

In [None]:
ratings_pred = np.dot(np.dot(mu, sigma), vt) + mean_rating.reshape(-1, 1)
ratings_pred

array([[ 4.28886061,  0.14305516, -0.1950795 , ...,  0.03191195,
         0.05044975,  0.08891033],
       [ 0.74471587,  0.16965927,  0.33541808, ..., -0.10110207,
        -0.0540982 , -0.14018846],
       [ 1.81882382,  0.45613623,  0.09097801, ...,  0.01234452,
         0.01514752, -0.10995596],
       ...,
       [ 0.61908871, -0.16176859,  0.10673806, ..., -0.01336948,
        -0.0303543 , -0.11493552],
       [ 1.50360483, -0.03620761, -0.16126817, ..., -0.01090407,
        -0.03864749, -0.16835943],
       [ 1.99624816, -0.18598715, -0.1564782 , ..., -0.00664061,
         0.12706713,  0.28500112]])

In [None]:
pred_df = pd.DataFrame(ratings_pred, columns = ratings.columns)
pred_df

movie_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,3913,3914,3915,3916,3917,3918,3919,3920,3921,3922,3923,3924,3925,3926,3927,3928,3929,3930,3931,3932,3933,3934,3935,3936,3937,3938,3939,3940,3941,3942,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
0,4.288861,0.143055,-0.195080,-0.018843,0.012232,-0.176604,-0.074120,0.141358,-0.059553,-0.195950,0.512867,-0.089172,0.310181,-0.002005,-0.052401,-0.189827,0.238360,0.006466,-0.099315,-0.069682,-0.321492,0.111577,0.034795,0.320576,-0.118217,-0.012647,0.065573,-0.098318,0.064081,-0.005914,0.091936,0.180563,-0.009566,2.641693,-0.012495,0.765179,0.019784,0.002917,0.053079,0.014856,...,0.018810,-0.018782,0.022249,0.227852,-0.067653,-0.046039,-0.023574,-0.019405,-0.005116,-0.032921,-0.008259,-0.019157,0.007527,-0.008687,-0.025630,-0.013563,0.015240,-0.044665,-0.009568,-0.043549,-0.003131,-0.008221,-0.005948,0.031885,-0.003424,-0.001159,-0.002124,-0.002827,0.010393,-0.001068,0.027807,0.001640,0.026395,-0.022024,-0.085415,0.403529,0.105579,0.031912,0.050450,0.088910
1,0.744716,0.169659,0.335418,0.000758,0.022475,1.353050,0.051426,0.071258,0.161601,1.567246,0.772656,0.046179,-0.054562,0.042344,0.048390,0.347313,1.074905,-0.099782,0.008163,0.250869,2.186638,0.018789,-0.002199,0.218934,0.824475,0.139274,-0.007135,0.053071,-0.156952,0.044739,-0.002960,0.453298,-0.007484,0.920325,0.016566,1.335129,-0.015066,-0.045602,0.034649,0.122010,...,-0.042363,-0.137822,-0.112071,0.380783,-0.036273,-0.016174,0.002920,-0.148021,-0.017614,-0.033474,0.086133,0.008153,-0.126819,0.109208,0.001798,0.151866,0.014118,0.032897,0.005764,0.042259,0.022404,0.003260,0.010556,0.137181,-0.042184,0.006759,-0.005789,0.000340,0.002024,0.016013,-0.056502,-0.013733,-0.010580,0.062576,-0.016248,0.155790,-0.418737,-0.101102,-0.054098,-0.140188
2,1.818824,0.456136,0.090978,-0.043037,-0.025694,-0.158617,-0.131778,0.098977,0.030551,0.735470,-0.023476,0.034796,0.065942,0.008661,0.110348,-0.002952,-0.122061,0.063974,0.061033,0.081799,0.329471,0.149579,0.095352,-0.161493,0.022545,-0.009284,-0.002677,-0.142710,0.012345,-0.085331,0.076139,-0.355795,-0.008579,1.046871,-0.088946,0.383583,-0.018144,-0.038618,0.113984,0.006942,...,0.007233,-0.047221,0.066474,-0.179455,0.097428,0.034113,0.008098,-0.024784,-0.012749,-0.007394,-0.017220,0.004719,0.113348,-0.074943,-0.145795,0.128619,0.112567,0.045500,-0.018027,-0.058946,-0.002770,-0.035276,-0.008085,0.132182,-0.017005,0.014383,0.006598,-0.006217,-0.000342,0.000518,0.040481,-0.005301,0.012832,0.029349,0.020866,0.121532,0.076205,0.012345,0.015148,-0.109956
3,0.408057,-0.072960,0.039642,0.089363,0.041950,0.237753,-0.049426,0.009467,0.045469,-0.111370,-0.375831,0.068658,0.011199,0.069699,-0.037529,-0.238788,0.060607,-0.043418,0.053152,0.078237,0.357185,-0.096005,-0.028243,-0.067169,0.246164,-0.020379,0.034461,-0.022225,-0.012327,0.009182,0.014730,0.215893,-0.019687,-0.293933,-0.011511,0.145326,-0.029213,0.030029,-0.045409,-0.030684,...,-0.015077,-0.030208,0.028357,-0.072643,-0.135727,-0.053318,-0.012962,-0.054465,0.005870,-0.018048,-0.006836,-0.008222,-0.027214,-0.071677,-0.094072,-0.010745,-0.103191,-0.031297,-0.023920,-0.015053,-0.017914,-0.029561,-0.024299,-0.057678,-0.111450,-0.015473,-0.007123,-0.007416,-0.011508,-0.010038,0.008571,-0.005425,-0.008500,-0.003417,-0.083982,0.094512,0.057557,-0.026050,0.014841,-0.034224
4,1.574272,0.021239,-0.051300,0.246884,-0.032406,1.552281,-0.199630,-0.014920,-0.060498,0.450512,-0.251178,0.012337,-0.084051,0.258937,0.016570,0.980536,1.267869,0.275619,-0.008139,-0.038832,1.849627,0.107649,-0.168424,0.386541,1.790343,0.192379,-0.054356,0.267566,1.027817,0.374665,-0.010445,1.947980,0.017468,2.784035,0.274397,1.422393,0.040553,0.022926,1.345800,0.104507,...,0.075475,0.330767,0.150470,-0.261636,0.085163,-0.014229,-0.029247,0.124172,0.092875,0.061895,0.034757,0.054386,0.047055,0.048403,0.082926,0.129035,-0.174646,0.102727,0.024732,0.047280,0.017818,0.041451,0.041595,-0.007138,-0.080448,0.018639,0.034068,0.026941,0.035905,0.024459,0.110151,0.046010,0.006934,-0.015940,-0.050080,-0.052539,0.507189,0.033830,0.125706,0.199244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6035,2.392388,0.233964,0.413676,0.443726,-0.083641,2.192294,1.168936,0.145237,-0.046551,0.560895,2.239887,0.260693,0.457274,0.877058,-0.099574,2.249322,4.625192,0.224932,-0.185970,-0.032786,3.496632,1.188352,-0.069510,2.588398,2.669976,0.745047,0.190223,1.592740,2.578493,0.882242,0.201075,4.827702,-0.033057,3.352914,0.971525,3.675300,-0.007350,-0.100811,2.241727,0.371081,...,0.051894,0.174457,0.234864,-0.390962,0.493462,0.350091,0.184057,0.638891,0.171899,0.120872,0.456743,0.128194,0.778545,0.766031,1.564893,0.570338,0.557686,1.021153,0.139517,1.522876,0.075356,0.184025,0.203270,0.299414,0.568656,0.001079,0.040538,0.006475,0.028787,-0.001650,0.188493,-0.004439,-0.042271,-0.090101,0.276312,0.133806,0.732374,0.271234,0.244983,0.734771
6036,2.070760,0.139294,-0.012666,-0.176990,0.261243,1.074234,0.083999,0.013814,-0.030179,-0.084956,0.240129,0.113375,0.056177,-0.133446,-0.023538,0.704419,1.044988,0.035459,0.034388,-0.070357,0.039834,0.361462,0.108246,0.450324,0.668073,0.001098,0.001786,0.386045,0.242951,0.119038,-0.095362,2.641308,0.014731,3.733774,-0.039311,1.303293,-0.017777,0.030437,0.746487,0.018355,...,-0.066030,0.021357,-0.311375,-0.147729,0.075403,0.023405,0.046904,-0.021925,0.041731,0.023555,0.075101,0.011760,0.094490,0.112535,0.328443,0.059411,0.392990,0.113704,0.048713,0.329079,0.009256,0.025579,0.034883,0.050169,0.138306,-0.002727,-0.011607,0.003313,-0.013968,-0.015826,-0.161548,0.001184,-0.029223,-0.047087,0.099036,-0.192653,-0.091265,0.050798,-0.113427,0.033283
6037,0.619089,-0.161769,0.106738,0.007048,-0.074701,-0.079953,0.100220,-0.034013,0.007671,0.001280,0.182847,0.008752,-0.024851,-0.020687,-0.032327,0.087463,0.569903,0.019525,0.009234,-0.001446,0.005926,0.077055,-0.005642,-0.037568,0.034693,0.005470,-0.013814,0.268274,0.053833,0.023694,-0.017316,-0.059949,0.001826,0.316607,0.081318,-0.086490,0.007719,-0.013195,0.200490,-0.003172,...,-0.005348,-0.015400,-0.069972,-0.136648,-0.080802,-0.026776,0.003162,0.004596,-0.003947,0.007320,-0.024035,-0.000905,-0.029746,-0.035526,-0.056962,-0.009902,-0.016429,-0.049903,0.004288,-0.052947,-0.002821,-0.000025,0.000775,-0.015526,-0.077473,0.003093,-0.000203,0.004458,0.004425,0.008262,-0.053546,0.005835,0.007551,-0.024082,-0.010739,-0.008863,-0.099774,-0.013369,-0.030354,-0.114936
6038,1.503605,-0.036208,-0.161268,-0.083401,-0.081617,-0.143517,0.106668,-0.054404,-0.008826,0.205801,0.328057,0.038232,-0.028300,0.066413,0.069179,0.157424,1.014637,0.133620,-0.040673,-0.024593,0.134281,0.176887,0.142789,-0.165628,-0.331601,0.060515,-0.017799,0.700771,0.004748,-0.096274,0.051362,0.370183,0.010575,1.195629,0.057552,-0.281182,0.013472,-0.056712,0.966482,-0.001735,...,0.008669,0.004973,-0.113144,-0.099504,-0.110375,-0.065534,-0.034778,0.071899,0.044740,0.026308,-0.090400,0.043307,0.085520,0.056140,0.249134,0.160086,0.539439,0.042647,0.003240,0.173416,0.005727,-0.055664,0.002802,0.291708,-0.133167,-0.014116,-0.018968,-0.010119,-0.024114,-0.005999,-0.006104,0.008933,0.007595,-0.037800,0.050743,0.024052,-0.172466,-0.010904,-0.038647,-0.168359


## Testing

In [None]:
already_rated, new_preds = movie_rec(pred_df, 1310, movies_data, ratings_data, 20)

In [None]:
already_rated

Unnamed: 0,user_id,movie_id,rating,timestamp,user_embeded_id,movie_embeded_id,title,genres
5,1310,2248,5,974781573,1309,2247,Say Anything... (1989),Comedy|Drama|Romance
6,1310,2620,5,974781573,1309,2619,This Is My Father (1998),Drama|Romance
7,1310,3683,5,974781935,1309,3682,Blood Simple (1984),Drama|Film-Noir
15,1310,1704,5,974781573,1309,1703,Good Will Hunting (1997),Drama
1,1310,1293,5,974781839,1309,1292,Gandhi (1982),Drama
12,1310,3101,4,974781573,1309,3100,Fatal Attraction (1987),Thriller
11,1310,1343,4,974781534,1309,1342,Cape Fear (1991),Thriller
20,1310,2000,4,974781892,1309,1999,Lethal Weapon (1987),Action|Comedy|Crime|Drama
18,1310,3526,4,974781892,1309,3525,Parenthood (1989),Comedy|Drama
17,1310,3360,4,974781935,1309,3359,Hoosiers (1986),Drama


In [None]:
new_preds

Unnamed: 0,movie_id,title,genres
1618,1674,Witness (1985),Drama|Romance|Thriller
1880,1961,Rain Man (1988),Drama
1187,1210,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Romance|Sci-Fi|War
1216,1242,Glory (1989),Action|Drama|War
1202,1225,Amadeus (1984),Drama
1273,1302,Field of Dreams (1989),Drama
1220,1246,Dead Poets Society (1989),Drama
1881,1962,Driving Miss Daisy (1989),Drama
1877,1957,Chariots of Fire (1981),Drama
1938,2020,Dangerous Liaisons (1988),Drama|Romance


## Evaluation

In [None]:
reader = Reader()
data = Dataset.load_from_df(ratings_data[['user_id', 'movie_id', 'rating']], reader)
model = SVD()

cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8733  0.8716  0.8737  0.8749  0.8748  0.8737  0.0012  
MAE (testset)     0.6847  0.6841  0.6857  0.6872  0.6873  0.6858  0.0013  
Fit time          47.64   48.24   48.75   48.73   48.73   48.42   0.43    
Test time         3.49    2.63    2.95    2.95    3.00    3.01    0.28    


{'fit_time': (47.63755226135254,
  48.23733425140381,
  48.747910499572754,
  48.73135757446289,
  48.727574825286865),
 'test_mae': array([0.68469391, 0.68414908, 0.6856898 , 0.68721239, 0.6873304 ]),
 'test_rmse': array([0.87329754, 0.87160043, 0.87370145, 0.87489552, 0.87482097]),
 'test_time': (3.4912829399108887,
  2.6339845657348633,
  2.9532926082611084,
  2.9537436962127686,
  2.996577024459839)}

## Train-Test split and model Validation

In [None]:
training_set = data.build_full_trainset()
model.fit(training_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f0646d93d50>

In [None]:
ratings_data[ratings_data['user_id'] == 1500]

Unnamed: 0,user_id,movie_id,rating,timestamp,user_embeded_id,movie_embeded_id
248464,1500,3791,1,1038930397,1499,3790
248465,1500,3793,3,974749832,1499,3792
248466,1500,2054,3,1038930397,1499,2053
248467,1500,1257,3,988252269,1499,1256
248468,1500,2997,5,974748684,1499,2996
...,...,...,...,...,...,...
248574,1500,1223,4,988251891,1499,1222
248575,1500,1080,5,988252199,1499,1079
248576,1500,3911,5,1021963778,1499,3910
248577,1500,1230,5,988251911,1499,1229


In [None]:
model.predict(1500, 1240)

Prediction(uid=1500, iid=1240, r_ui=None, est=4.118643758891733, details={'was_impossible': False})