In [1]:
import pandas as pd

from collections import Counter

from utils import *

from surprise import Dataset,Reader
from surprise import KNNBasic,accuracy,SVD
from surprise.model_selection import train_test_split

from sklearn.metrics import classification_report

In [3]:
df_animes = pd.read_csv('./data/anime.csv')
df_animes['Genres_list'] = df_animes.Genres.apply(lambda x: x.split(', '))

In [4]:
df = pd.read_csv('./data/rating_complete.csv', usecols=range(3), header=0, names=('user', 'item', 'rating'), nrows=500000) #limit row to 500k for tests or things take a while
df_reco = pd.read_csv('./data/reco_rates.csv', header=1)

In [5]:
df_full = combine_df(df_reco, df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reco_comp['rating'] = df_reco_comp.rating.apply(lambda x: 1 if x== -1 else 2) #set the rating scale to 1 or 2 for our dataset


In [5]:
genres_pers, genres_full = compute_genres_stats(pd.merge(df_full.query('user=="maxime"'), df_animes, how='inner', left_on='item', right_on='MAL_ID')), compute_genres_stats(df_animes)

In [6]:
genres_for_playlists(genres_pers, genres_full)

{'Adventure': 0.4982917663136317,
 'Drama': 0.35087119918004783,
 'Comedy': 0.3233686368295182,
 'Demons': 0.3048058307709828,
 'Mystery': 0.29193713700034163,
 'Seinen': 0.2860722013438105,
 'Action': 0.27861291424666895,
 'Romance': 0.2252021409862202,
 'Shounen': 0.21928026420681013,
 'Yaoi': 0.16427513950575104,
 'Samurai': 0.15516455984512015,
 'Fantasy': 0.146281744676005,
 'Martial Arts': 0.1424666894431158,
 'Shoujo': 0.12749117412595376,
 'Sports': 0.1260676460539802,
 'Ecchi': 0.12299282541851725,
 'Magic': 0.10511331283452909,
 'Historical': 0.10152602209315567,
 'Hentai': 0.08991003302585127,
 'Sci-Fi': 0.01958774627035645}

In [31]:
df_full_temp_1 = df_full.query('rating==1')
df_full_temp_2 = df_full.query('rating==2')

if len(df_full_temp_2)>len(df_full_temp_1):
    df_full_temp_2 = df_full_temp_2.iloc[:len(df_full_temp_1),:]
else:
    df_full_temp_1 = df_full_temp_1.iloc[:len(df_full_temp_2),:]
df_full = pd.concat([df_full_temp_1, df_full_temp_2])

In [33]:
reader = Reader(rating_scale=(1,2))
data = Dataset.load_from_df(df_full, reader)

# split dataset into 80% train and 20% test
trainset, testset=train_test_split(data, test_size=0.2, random_state=7)

### Using KNN

In [34]:
#user-based Cosine
sim_options = {
    "user_based": True,  # compute  similarities between users
    "name":"cosine"
}

# Let us make K change K=40
algo = KNNBasic(sim_options=sim_options,k=40, min_k=5, verbose=False) #default value of k is 40
algo.fit(trainset)
predictions = algo.test(testset)
print(accuracy.rmse(predictions))
print(accuracy.mae(predictions))

res = []
for pred in predictions:
    n_pred = 2 if pred.est>(1.5) else 1
    res.append(n_pred == pred.r_ui)
dict_res = dict(Counter(res))
print('Accuracy:', dict_res[True]/(dict_res[True]+dict_res[False]))

RMSE: 0.4782
0.4782073742090177
MAE:  0.4260
0.42597345140959403
Accuracy: 0.6644461008105842


In [35]:
print(classification_report([int(pred.r_ui) for pred in predictions], [2 if pred.est>(1.5) else 1 for pred in predictions]))

              precision    recall  f1-score   support

           1       0.66      0.67      0.67     10706
           2       0.67      0.66      0.66     10760

    accuracy                           0.66     21466
   macro avg       0.66      0.66      0.66     21466
weighted avg       0.66      0.66      0.66     21466



### Using SVD

In [36]:
# Use SVD algorithm
algo = SVD(n_epochs=30, lr_all=0.005, reg_all=0.4)
algo.fit(trainset)
predictions = algo.test(testset)
print(accuracy.rmse(predictions))
print(accuracy.mae(predictions))

RMSE: 0.2533
0.2532980805915427
MAE:  0.2061
0.2060592683007716


In [37]:
print(classification_report([int(pred.r_ui) for pred in predictions], [2 if pred.est>(1.5) else 1 for pred in predictions]))

              precision    recall  f1-score   support

           1       0.99      0.89      0.94     10706
           2       0.90      0.99      0.94     10760

    accuracy                           0.94     21466
   macro avg       0.95      0.94      0.94     21466
weighted avg       0.95      0.94      0.94     21466



In [108]:
get_recommendations_v2('maxime', algo, df_full, df_animes, 'Military')

Unnamed: 0,MAL_ID,est_rating,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,...,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,Genres_list
9214,17717,1.693649,Mobile Suit Gundam: The 08th MS Team - A Battl...,7.1,"Action, Mecha, Military",Unknown,機動戦士ガンダム 第08MS小隊 三次元との戦い,Special,1,"Feb 22, 2013",...,341.0,913.0,1322.0,648.0,252.0,63.0,27.0,13.0,4.0,"[Action, Mecha, Military]"
1798,28907,1.638186,"Gate: Jieitai Kanochi nite, Kaku Tatakaeri",7.74,"Action, Adventure, Fantasy, Military",GATE,GATE（ゲート）自衛隊　彼の地にて、斯く戦えり,TV,12,"Jul 4, 2015 to Sep 19, 2015",...,64199.0,112250.0,80259.0,29438.0,12892.0,5895.0,2585.0,1405.0,1031.0,"[Action, Adventure, Fantasy, Military]"
8333,1095,1.627289,Kidou Keisatsu Patlabor the Movie,7.55,"Drama, Mecha, Military, Police",Patlabor:The Movie,機動警察パトレイバー the Movie,Movie,1,"Jul 15, 1989",...,2169.0,4879.0,4343.0,1535.0,578.0,149.0,70.0,21.0,26.0,"[Drama, Mecha, Military, Police]"
4536,6857,1.622406,Momotarou no Umiwashi,5.0,"Military, Historical, Kids",Momotaro's Sea Eagles,桃太郎の海鷲,Movie,1,"Mar 25, 1943",...,7.0,23.0,54.0,119.0,157.0,88.0,78.0,54.0,28.0,"[Military, Historical, Kids]"
1125,27631,1.613866,God Eater,7.3,"Action, Fantasy, Military, Sci-Fi",God Eater,GOD EATER,TV,13,"Jul 12, 2015 to Mar 26, 2016",...,24915.0,50416.0,55417.0,28570.0,14157.0,6929.0,3031.0,1472.0,1001.0,"[Action, Fantasy, Military, Sci-Fi]"
4045,2581,1.609334,Mobile Suit Gundam 00,8.14,"Action, Military, Sci-Fi, Space, Drama, Mecha",Mobile Suit Gundam 00,機動戦士ガンダム00,TV,25,"Oct 6, 2007 to Mar 29, 2008",...,26604.0,33240.0,18762.0,5996.0,2517.0,960.0,306.0,178.0,182.0,"[Action, Military, Sci-Fi, Space, Drama, Mecha]"
2811,20709,1.601353,Sabage-bu!,7.38,"Action, Military, Comedy, Shoujo",Sabagebu! -Survival Game Club!-,さばげぶっ!,TV,12,"Jul 6, 2014 to Sep 21, 2014",...,6011.0,13629.0,14148.0,6789.0,2999.0,1011.0,371.0,171.0,124.0,"[Action, Military, Comedy, Shoujo]"
1828,5114,1.596368,Fullmetal Alchemist: Brotherhood,9.19,"Action, Military, Adventure, Comedy, Drama, Ma...",Fullmetal Alchemist:Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,TV,64,"Apr 5, 2009 to Jul 4, 2010",...,401507.0,199160.0,70045.0,20210.0,9308.0,3222.0,1536.0,2162.0,16806.0,"[Action, Military, Adventure, Comedy, Drama, M..."
7795,2158,1.589836,Terra e... (TV),7.92,"Action, Drama, Military, Sci-Fi, Space",Unknown,地球へ…,TV,24,"Apr 7, 2007 to Sep 22, 2007",...,3613.0,4791.0,3404.0,1497.0,692.0,319.0,115.0,62.0,61.0,"[Action, Drama, Military, Sci-Fi, Space]"
72,6280,1.585758,Cat Shit One,7.01,"Action, Fantasy, Military",Unknown,キャットシットワン,ONA,1,"Jul 17, 2010",...,1794.0,3254.0,3939.0,2462.0,1370.0,669.0,332.0,208.0,203.0,"[Action, Fantasy, Military]"
