In [1]:
import pandas as pd

from collections import Counter

from utils import *

from surprise import Dataset,Reader
from surprise import KNNBasic,accuracy,SVD
from surprise.model_selection import train_test_split

from sklearn.metrics import classification_report

In [2]:
df_animes = pd.read_csv('./data/anime.csv')
df_animes['Genres_list'] = df_animes.Genres.apply(lambda x: x.split(', '))

In [18]:
df = pd.read_csv('./data/rating_complete.csv', usecols=range(3), header=0, names=('user', 'item', 'rating'), nrows=500000) #limit row to 500k for tests or things take a while
df_reco = pd.read_csv('./data/reco_rates.csv', header=1, nrows=10)

In [20]:
df_full = combine_df(df_reco, df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reco_comp['rating'] = df_reco_comp.rating.apply(lambda x: 1 if x== -1 else 2) #set the rating scale to 1 or 2 for our dataset


In [21]:
genres_pers, genres_full = compute_genres_stats(pd.merge(df_full.query('user=="maxime"'), df_animes, how='inner', left_on='item', right_on='MAL_ID')), compute_genres_stats(df_animes)

In [22]:
genres_for_playlists(genres_pers, genres_full)

{'Demons': 0.9714724974376495,
 'Adventure': 0.8316250996469651,
 'Samurai': 0.4884978931784535,
 'Martial Arts': 0.47580002277644917,
 'Shoujo': 0.4608245074592871,
 'Mystery': 0.4586038036670083,
 'Magic': 0.43844664616786244,
 'Historical': 0.434859355426489,
 'Romance': 0.3918688076528869,
 'Shounen': 0.3859469308734768,
 'Drama': 0.35087119918004783,
 'Fantasy': 0.3129484113426717,
 'Action': 0.27861291424666895,
 'Comedy': 0.1567019701628516}

In [23]:
df_full_temp_1 = df_full.query('rating==1')
df_full_temp_2 = df_full.query('rating==2')

if len(df_full_temp_2)>len(df_full_temp_1):
    df_full_temp_2 = df_full_temp_2.iloc[:len(df_full_temp_1),:]
else:
    df_full_temp_1 = df_full_temp_1.iloc[:len(df_full_temp_2),:]
df_full = pd.concat([df_full_temp_1, df_full_temp_2])

In [24]:
reader = Reader(rating_scale=(1,2))
data = Dataset.load_from_df(df_full, reader)

# split dataset into 80% train and 20% test
trainset, testset=train_test_split(data, test_size=0.2, random_state=7)

### Using KNN

In [34]:
#user-based Cosine
sim_options = {
    "user_based": True,  # compute  similarities between users
    "name":"cosine"
}

# Let us make K change K=40
algo = KNNBasic(sim_options=sim_options,k=40, min_k=5, verbose=False) #default value of k is 40
algo.fit(trainset)
predictions = algo.test(testset)
print(accuracy.rmse(predictions))
print(accuracy.mae(predictions))

res = []
for pred in predictions:
    n_pred = 2 if pred.est>(1.5) else 1
    res.append(n_pred == pred.r_ui)
dict_res = dict(Counter(res))
print('Accuracy:', dict_res[True]/(dict_res[True]+dict_res[False]))

RMSE: 0.4782
0.4782073742090177
MAE:  0.4260
0.42597345140959403
Accuracy: 0.6644461008105842


In [35]:
print(classification_report([int(pred.r_ui) for pred in predictions], [2 if pred.est>(1.5) else 1 for pred in predictions]))

              precision    recall  f1-score   support

           1       0.66      0.67      0.67     10706
           2       0.67      0.66      0.66     10760

    accuracy                           0.66     21466
   macro avg       0.66      0.66      0.66     21466
weighted avg       0.66      0.66      0.66     21466



### Using SVD

In [25]:
# Use SVD algorithm
algo = SVD(n_epochs=30, lr_all=0.005, reg_all=0.4)
algo.fit(trainset)
predictions = algo.test(testset)
print(accuracy.rmse(predictions))
print(accuracy.mae(predictions))

RMSE: 0.2527
0.2527433757227176
MAE:  0.2058
0.20575350370946524


In [26]:
print(classification_report([int(pred.r_ui) for pred in predictions], [2 if pred.est>(1.5) else 1 for pred in predictions]))

              precision    recall  f1-score   support

           1       0.99      0.89      0.94     10704
           2       0.90      0.99      0.95     10758

    accuracy                           0.94     21462
   macro avg       0.95      0.94      0.94     21462
weighted avg       0.95      0.94      0.94     21462



In [29]:
get_recommendations_v2('maxime', algo, df_full, df_animes, 'Adventure')

Unnamed: 0,MAL_ID,est_rating,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,...,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,Genres_list
125,20507,1.609984,Noragami,8.01,"Action, Adventure, Comedy, Supernatural, Shounen",Noragami:Stray God,ノラガミ,TV,12,"Jan 5, 2014 to Mar 23, 2014",...,203285.0,311913.0,199912.0,60200.0,21152.0,5854.0,1878.0,858.0,905.0,"[Action, Adventure, Comedy, Supernatural, Shou..."
2718,1121,1.55443,Pokemon Movie 06: Nanayo no Negaiboshi Jirachi,6.76,"Adventure, Comedy, Kids, Drama, Fantasy",Pokemon:Jirachi Wishmaker,ポケットモンスター・アドバンスジェネレーション 七夜の願い星 ジラーチ,Movie,1,"Jul 19, 2003",...,3433.0,7633.0,13947.0,10583.0,5562.0,1884.0,727.0,312.0,173.0,"[Adventure, Comedy, Kids, Drama, Fantasy]"
3402,2249,1.515441,Amon Saga,5.49,"Action, Adventure, Fantasy",Unknown,アモン・サーガ,OVA,1,"Jul 19, 1986",...,44.0,112.0,340.0,455.0,456.0,276.0,133.0,75.0,37.0,"[Action, Adventure, Fantasy]"
4327,4391,1.514818,Saru Masamune,5.46,"Adventure, Military",The Monkey Sword Masamune,猿正宗,Movie,1,"Jan, 1931",...,12.0,34.0,103.0,186.0,185.0,75.0,38.0,26.0,32.0,"[Adventure, Military]"
5326,2513,1.506919,Detective Conan OVA 03: Conan and Heiji and th...,7.2,"Adventure, Mystery, Comedy, Police, Shounen",Unknown,名探偵コナン:コナンと平次と消えた少年,OVA,1,2003,...,599.0,1347.0,2168.0,1147.0,514.0,147.0,42.0,14.0,22.0,"[Adventure, Mystery, Comedy, Police, Shounen]"
1380,1840,1.502475,Zero no Tsukaima: Futatsuki no Kishi,7.49,"Action, Adventure, Harem, Comedy, Magic, Roman...",The Familiar of Zero:Knight of the Twin Moons,ゼロの使い魔 ～双月の騎士～,TV,12,"Jul 9, 2007 to Sep 24, 2007",...,37977.0,66941.0,68906.0,32268.0,14717.0,6043.0,2594.0,1286.0,918.0,"[Action, Adventure, Harem, Comedy, Magic, Roma..."
2608,665,1.500435,JoJo no Kimyou na Bouken: Adventure,7.18,"Adventure, Drama, Fantasy, Horror, Shounen, Su...",JoJo's Bizarre Adventure,ジョジョの奇妙な冒険 ADVENTURE,OVA,7,"May 25, 2000 to Jan 25, 2002",...,2601.0,4933.0,6877.0,4068.0,1913.0,696.0,282.0,108.0,182.0,"[Adventure, Drama, Fantasy, Horror, Shounen, S..."
762,11241,1.498196,Brave 10,6.8,"Action, Adventure, Historical, Super Power, Sa...",Brave 10,ブレイブ・テン,TV,12,"Jan 8, 2012 to Mar 25, 2012",...,5179.0,11428.0,18986.0,12938.0,7300.0,2756.0,1173.0,511.0,301.0,"[Action, Adventure, Historical, Super Power, S..."
8432,1255,1.496109,Saint Seiya: Shinku no Shounen Densetsu,7.23,"Adventure, Fantasy, Sci-Fi, Shounen",Unknown,聖闘士星矢 真紅の少年伝説,Movie,1,"Jul 23, 1988",...,861.0,1523.0,2220.0,1139.0,570.0,220.0,94.0,33.0,24.0,"[Adventure, Fantasy, Sci-Fi, Shounen]"
2364,1669,1.490924,Bakuten Shoot Beyblade 2002,6.68,"Action, Adventure, Comedy, Sci-Fi, Shounen, Sp...",Beyblade V-Force,爆転シュート　ベイブレード2002,TV,51,"Jan 7, 2002 to Dec 30, 2002",...,2361.0,5144.0,9444.0,7564.0,4302.0,1465.0,651.0,327.0,158.0,"[Action, Adventure, Comedy, Sci-Fi, Shounen, S..."
