In [1]:
from collections import defaultdict
import pandas as pd
import numpy as np
import pickle
import random

from surprise import SVD, NMF, KNNWithMeans, KNNBaseline
from surprise.model_selection import KFold, GridSearchCV
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import Reader, Dataset
from surprise import accuracy

from sklearn import model_selection

In [2]:
SEED = 8888
random.seed(SEED)

In [3]:
with open('df_sample.pickle', 'rb') as f:
    df = pickle.load(f)

In [4]:
with open('anime_sample_dict.pickle', 'rb') as f:
    anime_dict = pickle.load(f)

In [5]:
with open('sample_users_watchlist.pickle', 'rb') as f:
    users_watchlist = pickle.load(f)

In [6]:
with open('sample_users_ratings.pickle', 'rb') as f:
    users_ratings = pickle.load(f)

In [7]:
def get_top_n_recommendations(predictions, n=10):
    top_n = defaultdict(list)
    
    # Mapping the predictions to each user #
    for user_id, anime_id, true_rating, est_rating, _ in predictions:
        if (user_id, anime_id) in users_ratings:
            continue
        top_n[user_id].append((anime_id, est_rating))
    
    # Sorting the predictions for each user and returning the highest n animes #
    for user_id, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[user_id] = user_ratings[:n]
    
    return top_n

In [11]:
# Creating a Surprise Dataset object #
reader = Reader(rating_scale=(1,10))
data = Dataset.load_from_df(df[['user_idx', 'anime_idx', 'rating']], reader)

In [12]:
trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()

In [13]:
algo = SVD(n_factors=50, n_epochs=20, verbose=True)
algo.fit(trainset)
predictions = algo.test(testset)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19


In [14]:
SVD_recommendations = get_top_n_recommendations(predictions, n=10)

In [15]:
SVD_recommendations

defaultdict(list,
            {0: [(443, 10),
              (752, 9.905052691160426),
              (98, 9.881713757974582),
              (127, 9.863771613820681),
              (944, 9.687604137815459),
              (799, 9.512984575954038),
              (18, 9.47113753815339),
              (522, 9.417359865209916),
              (997, 9.389320188997804),
              (822, 9.36991129132752)],
             1: [(3, 10),
              (700, 10),
              (522, 10),
              (287, 10),
              (98, 10),
              (416, 10),
              (520, 10),
              (536, 10),
              (190, 10),
              (670, 10)],
             9: [(520, 9.412629756348224),
              (518, 9.362153711958227),
              (477, 9.308522215595664),
              (767, 9.303133144154344),
              (665, 9.302762518663458),
              (28, 9.29498388260848),
              (26, 9.29492044892344),
              (0, 9.280464575929745),
              (333, 9.2714610

In [16]:
user_recommendations = {}
for key, value in SVD_recommendations.items():
    user_recommendations[key] = list(map(lambda x: anime_dict[x[0]], value))

In [18]:
with open('user_recommendations.pickle', 'wb') as f:
    pickle.dump(user_recommendations, f)

In [19]:
user_recommendations

{0: ['Vampire Knight Guilty',
  'Vampire Knight',
  'Fruits Basket',
  'Blood+',
  'Lucky☆Star',
  'Death Note',
  'Monster',
  'Code Geass: Hangyaku no Lelouch R2',
  'Natsume Yuujinchou',
  'Code Geass: Hangyaku no Lelouch'],
 1: ['Trigun',
  'Saint Seiya',
  'Code Geass: Hangyaku no Lelouch R2',
  'Mirai Nikki (TV)',
  'Fruits Basket',
  'Hotaru no Haka',
  'Ouran Koukou Host Club',
  'Nana',
  'Elfen Lied',
  'Suzumiya Haruhi no Shoushitsu'],
 9: ['Ouran Koukou Host Club',
  'Suzumiya Haruhi no Yuuutsu',
  'Shigatsu wa Kimi no Uso',
  'Mushishi Zoku Shou',
  'Hajime no Ippo: New Challenger',
  'Neon Genesis Evangelion: The End of Evangelion',
  'Neon Genesis Evangelion',
  'Cowboy Bebop',
  'Mushishi',
  'Hotaru no Haka'],
 12: ['Hunter x Hunter (2011)',
  'Mononoke',
  'Gintama&#039;',
  'Slam Dunk',
  'Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare',
  'Kimi no Na wa.',
  'Hajime no Ippo: Rising',
  'Usagi Drop',
  'Gintama&#039;: Enchousen',
  'Tonari no Totoro'],
 16: ['Ha

In [14]:
with open('SVD_recommnedations', 'wb') as f:
    pickle.dump(SVD_recommendations, f)

In [14]:
algo = NMF(n_factors=15, n_epochs=30, verbose=True)
cross_validate(algo, data, measures=['rmse', 'mae'], cv=5, n_jobs=-1)

{'test_rmse': array([3.03924586, 3.04947771, 3.05584938, 3.03110599, 3.04108803]),
 'test_mae': array([2.70536714, 2.71630197, 2.72033587, 2.69246892, 2.70436274]),
 'fit_time': (21.148293256759644,
  25.084901809692383,
  22.545557737350464,
  20.44687819480896,
  17.04156517982483),
 'test_time': (2.0794942378997803,
  1.248582124710083,
  1.246950387954712,
  1.079233169555664,
  0.850212812423706)}

In [None]:
algo = SVsD(n_factors=50, n_epochs=20, verbose=True)
predictions = algo.fit(data)