In [1]:
from collections import defaultdict
import pandas as pd
import numpy as np
import pickle
import random

from surprise import SVD, NMF, KNNWithMeans, KNNBaseline
from surprise.model_selection import KFold, GridSearchCV
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset
from surprise import accuracy

from sklearn import model_selection

In [2]:
SEED = 8888
random.seed(SEED)

In [3]:
with open('df_sample.pickle', 'rb') as f:
    df = pickle.load(f)

In [4]:
with open('anime_sample_dict.pickle', 'rb') as f:
    anime_dict = pickle.load(f)

In [5]:
with open('sample_users_watchlist.pickle', 'rb') as f:
    users_watchlist = pickle.load(f)

In [6]:
with open('sample_users_ratings.pickle', 'rb') as f:
    users_ratings = pickle.load(f)

In [16]:
def similar_anime_recommendations(anime, algo, k=10):
    anime_iid = algo.trainset.to_inner_iid(anime)
    anime_neighbors = algo.get_neighbors(anime_iid, k=k)
    anime_neighbors_names = [algo.trainset.to_raw_iid(inner_id) for \
                             inner_id in anime_neighbors]
    
    print(f'If you liked {anime}, you may also like these animes:')
    for similar_anime in anime_neighbors_names:
        print(similar_anime) 

In [8]:
# Creating a Surprise Dataset object #
reader = Reader(rating_scale=(1,10))
data = Dataset.load_from_df(df[['user_idx', 'name', 'rating']], reader)

In [10]:
sim_options = {'name': 'cosine', 'user_based': False}
algo = KNNWithMeans(k=50, sim_options=sim_options)
cross_validate(algo, data, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.1182  2.1122  2.1221  2.1297  2.1086  2.1181  0.0074  
MAE (testset)     1.3886  1.3877  1.3894  1.3978  1.3898  1.3907  0.0036  
Fit time          4.76    5.45    5.53    5.59    5.58    5.38    0.31    
Test time         25.69   28.05   27.59   27.79   31.73   28.17   1.96    


{'test_rmse': array([2.11818859, 2.11215869, 2.12207776, 2.12966304, 2.10855102]),
 'test_mae': array([1.3886222 , 1.38769418, 1.38944751, 1.39776558, 1.38979015]),
 'fit_time': (4.761839866638184,
  5.4464192390441895,
  5.534144878387451,
  5.589489221572876,
  5.582438230514526),
 'test_time': (25.69375467300415,
  28.051936388015747,
  27.58886694908142,
  27.789535760879517,
  31.725661277770996)}

In [11]:
sim_options = {'name': 'pearson', 'user_based': False}
algo = KNNWithMeans(k=50, sim_options=sim_options)
cross_validate(algo, data, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.0951  2.1075  2.0980  2.1030  2.1094  2.1026  0.0054  
MAE (testset)     1.3780  1.3847  1.3788  1.3827  1.3841  1.3817  0.0028  
Fit time          6.71    6.80    6.78    7.76    7.35    7.08    0.41    
Test time         25.23   25.02   27.64   26.09   26.02   26.00   0.92    


{'test_rmse': array([2.0951327 , 2.10752916, 2.09796734, 2.10302288, 2.109407  ]),
 'test_mae': array([1.37802985, 1.38469382, 1.37876926, 1.38272107, 1.38412463]),
 'fit_time': (6.714022159576416,
  6.803413152694702,
  6.7826128005981445,
  7.762157201766968,
  7.3487935066223145),
 'test_time': (25.22697138786316,
  25.021977186203003,
  27.635300874710083,
  26.094757318496704,
  26.022053718566895)}

In [12]:
sim_options = {'name': 'msd', 'user_based': False}
algo = KNNWithMeans(k=50, sim_options=sim_options)
cross_validate(algo, data, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.1001  2.1020  2.1030  2.0805  2.0967  2.0965  0.0083  
MAE (testset)     1.3768  1.3745  1.3769  1.3646  1.3751  1.3736  0.0046  
Fit time          3.07    3.23    3.23    3.23    3.78    3.31    0.25    
Test time         26.41   25.76   26.06   27.89   30.70   27.36   1.82    


{'test_rmse': array([2.10010547, 2.10198112, 2.10301994, 2.08047988, 2.0966664 ]),
 'test_mae': array([1.37676224, 1.37453   , 1.37692645, 1.36456867, 1.37508826]),
 'fit_time': (3.065747022628784,
  3.228817939758301,
  3.230854034423828,
  3.2277026176452637,
  3.783285140991211),
 'test_time': (26.407129049301147,
  25.75839376449585,
  26.05500864982605,
  27.88563108444214,
  30.695327520370483)}

In [13]:
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNBaseline(k=50, sim_options=sim_options)
cross_validate(algo, data, verbose=True)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    2.0328  2.0166  2.0185  2.0192  2.0164  2.0207  0.0061  
MAE (testset)     1.3167  1.3079  1.3120  1.3081  1.3084  1.3106  0.0034  
Fit time          8.94    9.19    10.86   9.22    9.68    9.58    0.68    
Test time         28.42   28.18   32.06   32.22   27.

{'test_rmse': array([2.03276189, 2.01658891, 2.01845345, 2.01919979, 2.01641122]),
 'test_mae': array([1.31666992, 1.30788919, 1.3119662 , 1.30812899, 1.30844818]),
 'fit_time': (8.944912672042847,
  9.194278240203857,
  10.86074686050415,
  9.215858936309814,
  9.68273663520813),
 'test_time': (28.421315908432007,
  28.183903455734253,
  32.0648148059845,
  32.22036528587341,
  27.836366176605225)}

In [14]:
trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()

In [15]:
sim_options = {'name': 'pearson_baseline', 'user_based': False}
algo = KNNBaseline(k=50, sim_options=sim_options)
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x7fd5971db490>

In [25]:
similar_anime_recommendations('One Punch Man', algo, k=10)

If you liked One Punch Man, you may also like these animes:
Sakurako-san no Ashimoto ni wa Shitai ga Umatteiru
Fairy Tail (2014)
Ore Monogatari!!
Kiznaiver
Haikyuu!! Second Season
Dimension W
Gate: Jieitai Kanochi nite, Kaku Tatakaeri 2nd Season
Rakudai Kishi no Cavalry
Owarimonogatari
Owari no Seraph: Nagoya Kessen-hen


In [26]:
with open('KNNBaseline_recommender.pickle', 'wb') as f:
    pickle.dump(algo, f)