In [2]:
import pandas as pd
from surprise import Reader, SVD, Dataset, accuracy
from surprise.model_selection import GridSearchCV, train_test_split, cross_validate
pd.set_option("display.max_columns",None)
movie = pd.read_csv("movies.csv")
rating = pd.read_csv("ratings.csv")
df = pd.merge(movie, rating,how="left",on="movieId")
df

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,3.0,4.0,9.449194e+08
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,6.0,5.0,8.582755e+08
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,8.0,4.0,8.339819e+08
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,10.0,4.0,9.434979e+08
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,11.0,4.5,1.230859e+09
...,...,...,...,...,...,...
20000792,131254,Kein Bund für's Leben (2007),Comedy,79570.0,4.0,1.427744e+09
20000793,131256,"Feuer, Eis & Dosenbier (2002)",Comedy,79570.0,4.0,1.427745e+09
20000794,131258,The Pirates (2014),Adventure,28906.0,2.5,1.427745e+09
20000795,131260,Rentun Ruusu (2001),(no genres listed),65409.0,3.0,1.427745e+09


In [3]:
movie_ids = [130219,356,4422,541]
movies= ["The Dark Knight (2011)",
         "Cries and Whispers (Viskningar och rop) (1972)",
         "Forrest Gump (1994)",
         "Blade Runner (1982)"]

sample_df = df.loc[df["movieId"].isin(movie_ids)]
sample_df

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
2457839,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.0,4.0,8.408789e+08
2457840,356,Forrest Gump (1994),Comedy|Drama|Romance|War,7.0,4.0,1.011208e+09
2457841,356,Forrest Gump (1994),Comedy|Drama|Romance|War,8.0,5.0,8.339823e+08
2457842,356,Forrest Gump (1994),Comedy|Drama|Romance|War,9.0,4.0,9.940192e+08
2457843,356,Forrest Gump (1994),Comedy|Drama|Romance|War,10.0,3.0,9.434971e+08
...,...,...,...,...,...,...
14742596,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137665.0,5.0,1.015723e+09
14742597,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137851.0,4.5,1.346153e+09
14742598,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137904.0,3.5,1.169100e+09
14742599,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,138325.0,5.0,1.282957e+09


In [7]:
user_movie_df = sample_df.pivot_table(index=["userId"],
                                      columns=["title"],
                                      values="rating")
user_movie_df

title,Blade Runner (1982),Cries and Whispers (Viskningar och rop) (1972),Forrest Gump (1994),The Dark Knight (2011)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,4.0,,,
2.0,5.0,,,
3.0,5.0,,,
4.0,,,4.0,
7.0,,,4.0,
...,...,...,...,...
138474.0,,,5.0,
138483.0,4.0,,4.0,
138484.0,,,5.0,
138486.0,,,5.0,


In [8]:
reader = Reader(rating_scale=(1,5))
data = Dataset.load_from_df(sample_df[["userId","movieId","rating"]],reader)
data

<surprise.dataset.DatasetAutoFolds at 0x2316f30afb0>

In [9]:
train,test = train_test_split(data,test_size=.25)
svd_model = SVD()
svd_model.fit(train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2316f30aad0>

In [10]:
predictions = svd_model.test(test)
predictions

[Prediction(uid=19522.0, iid=356, r_ui=5.0, est=4.179863903518062, details={'was_impossible': False}),
 Prediction(uid=69808.0, iid=541, r_ui=4.0, est=3.9960420954555245, details={'was_impossible': False}),
 Prediction(uid=112988.0, iid=4422, r_ui=3.0, est=3.893174917945861, details={'was_impossible': False}),
 Prediction(uid=121763.0, iid=356, r_ui=4.0, est=4.179863903518062, details={'was_impossible': False}),
 Prediction(uid=78137.0, iid=541, r_ui=5.0, est=3.9929479726956694, details={'was_impossible': False}),
 Prediction(uid=84441.0, iid=541, r_ui=3.0, est=4.228196497479908, details={'was_impossible': False}),
 Prediction(uid=58419.0, iid=356, r_ui=3.5, est=4.179863903518062, details={'was_impossible': False}),
 Prediction(uid=21809.0, iid=541, r_ui=4.0, est=4.212088870292035, details={'was_impossible': False}),
 Prediction(uid=12947.0, iid=356, r_ui=4.5, est=4.179863903518062, details={'was_impossible': False}),
 Prediction(uid=84612.0, iid=541, r_ui=4.5, est=4.128512808139246, d

In [13]:
accuracy.rmse(predictions)

RMSE: 0.9475


0.947515389876652

In [14]:
svd_model.predict(uid=1.0,iid=541,verbose=True)

user: 1.0        item: 541        r_ui = None   est = 3.97   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=3.973959397363752, details={'was_impossible': False})

In [17]:
svd_model.predict(uid=5.0,iid=541,verbose=True)

user: 5.0        item: 541        r_ui = None   est = 4.16   {'was_impossible': False}


Prediction(uid=5.0, iid=541, r_ui=None, est=4.1550269389828935, details={'was_impossible': False})

In [16]:
sample_df.loc[sample_df["userId"]==1]

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
3612352,541,Blade Runner (1982),Action|Sci-Fi|Thriller,1.0,4.0,1112485000.0


In [27]:
svd_model.predict(uid=5.0,iid=541,verbose=True)

user: 5.0        item: 541        r_ui = None   est = 4.16   {'was_impossible': False}


Prediction(uid=5.0, iid=541, r_ui=None, est=4.1550269389828935, details={'was_impossible': False})

In [25]:
sample_df

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
2457839,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.0,4.0,8.408789e+08
2457840,356,Forrest Gump (1994),Comedy|Drama|Romance|War,7.0,4.0,1.011208e+09
2457841,356,Forrest Gump (1994),Comedy|Drama|Romance|War,8.0,5.0,8.339823e+08
2457842,356,Forrest Gump (1994),Comedy|Drama|Romance|War,9.0,4.0,9.940192e+08
2457843,356,Forrest Gump (1994),Comedy|Drama|Romance|War,10.0,3.0,9.434971e+08
...,...,...,...,...,...,...
14742596,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137665.0,5.0,1.015723e+09
14742597,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137851.0,4.5,1.346153e+09
14742598,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,137904.0,3.5,1.169100e+09
14742599,4422,Cries and Whispers (Viskningar och rop) (1972),Drama,138325.0,5.0,1.282957e+09


In [20]:
sample_df.loc[sample_df["userId"]==7]

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
2457840,356,Forrest Gump (1994),Comedy|Drama|Romance|War,7.0,4.0,1011208000.0


In [33]:
param_grid = {"n_epochs":[10,20,30,40],
              "lr_all":[0.002,0.005,0.007]}

gs = GridSearchCV(SVD,param_grid,measures=["rmse","mae"],cv=10,n_jobs=-1,joblib_verbose=True)
gs.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:   25.5s finished


In [34]:
gs.best_params

{'rmse': {'n_epochs': 10, 'lr_all': 0.002},
 'mae': {'n_epochs': 10, 'lr_all': 0.005}}

In [35]:
gs.best_score

{'rmse': 0.9306576265135214, 'mae': 0.7149213650227233}

In [36]:
gs.best_params["rmse"]

{'n_epochs': 10, 'lr_all': 0.002}

In [37]:
dir(SVD())

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'biased',
 'bsl_options',
 'compute_baselines',
 'compute_similarities',
 'default_prediction',
 'estimate',
 'fit',
 'get_neighbors',
 'init_mean',
 'init_std_dev',
 'lr_bi',
 'lr_bu',
 'lr_pu',
 'lr_qi',
 'n_epochs',
 'n_factors',
 'predict',
 'random_state',
 'reg_bi',
 'reg_bu',
 'reg_pu',
 'reg_qi',
 'sgd',
 'sim_options',
 'test',
 'verbose']

In [38]:
SVD().n_epochs

20

In [43]:
SVD().lr_qi

0.005

In [44]:
svd_model = SVD(**gs.best_params["rmse"])

In [None]:
# 0212 338 1371-zehra puntar

In [45]:
data = data.build_full_trainset()
svd_model.fit(data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x23104f4d300>

In [54]:
svd_model.predict(uid=1.0,iid=541,verbose=True)

user: 1.0        item: 541        r_ui = None   est = 4.22   {'was_impossible': False}


Prediction(uid=1.0, iid=541, r_ui=None, est=4.21843253325537, details={'was_impossible': False})