In [1]:
import pandas as pd

from collections import Counter

from utils import *

from surprise import Dataset,Reader
from surprise import KNNBasic,accuracy
from surprise.model_selection import train_test_split

from sklearn.metrics import classification_report

In [2]:
df_animes = pd.read_csv('./data/anime.csv')
df_animes['Genres_list'] = df_animes.Genres.apply(lambda x: x.split(', '))

In [3]:
df = pd.read_csv('./data/rating_complete.csv', usecols=range(3), header=0, names=('user', 'item', 'rating'), nrows=500000) #limit row to 500k for tests or things take a while
df_reco = pd.read_csv('./data/reco_rates.csv', header=1)

In [4]:
df_full = combine_df(df_reco, df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reco_comp['rating'] = df_reco_comp.rating.apply(lambda x: 1 if x== -1 else 2) #set the rating scale to 1 or 2 for our dataset


In [5]:
genres_pers, genres_full = compute_genres_stats(pd.merge(df_full.query('user=="maxime"'), df_animes, how='inner', left_on='item', right_on='MAL_ID')), compute_genres_stats(df_animes)

In [6]:
genres_for_playlists(genres_pers, genres_full)

{'Adventure': 0.4982917663136317,
 'Drama': 0.35087119918004783,
 'Comedy': 0.3233686368295182,
 'Demons': 0.3048058307709828,
 'Mystery': 0.29193713700034163,
 'Seinen': 0.2860722013438105,
 'Action': 0.27861291424666895,
 'Romance': 0.2252021409862202,
 'Shounen': 0.21928026420681013,
 'Yaoi': 0.16427513950575104,
 'Samurai': 0.15516455984512015,
 'Fantasy': 0.146281744676005,
 'Martial Arts': 0.1424666894431158,
 'Shoujo': 0.12749117412595376,
 'Sports': 0.1260676460539802,
 'Ecchi': 0.12299282541851725,
 'Magic': 0.10511331283452909,
 'Historical': 0.10152602209315567,
 'Hentai': 0.08991003302585127,
 'Sci-Fi': 0.01958774627035645}

In [7]:
reader = Reader(rating_scale=(1,2))
data = Dataset.load_from_df(df_full, reader)

# split dataset into 80% train and 20% test
trainset, testset=train_test_split(data, test_size=0.2, random_state=7)

#user-based Cosine
sim_options = {
    "user_based": True,  # compute  similarities between users
    "name":"cosine"
}

# Let us make K change K=40
algo = KNNBasic(sim_options=sim_options,k=40, min_k=5, verbose=False) #default value of k is 40
algo.fit(trainset)
predictions = algo.test(testset)
print(accuracy.rmse(predictions))
print(accuracy.mae(predictions))

res = []
for pred in predictions:
    n_pred = 2 if pred.est>(1.5) else 1
    res.append(n_pred == pred.r_ui)
dict_res = dict(Counter(res))
print('Accuracy:', dict_res[True]/(dict_res[True]+dict_res[False]))

RMSE: 0.2966
0.2965931913751708
MAE:  0.1366
0.13663688101920107
Accuracy: 0.8939559066140079


In [8]:
print(classification_report([int(pred.r_ui) for pred in predictions], [2 if pred.est>(1.5) else 1 for pred in predictions]))

              precision    recall  f1-score   support

           1       0.58      0.05      0.09     10742
           2       0.90      1.00      0.94     89273

    accuracy                           0.89    100015
   macro avg       0.74      0.52      0.52    100015
weighted avg       0.86      0.89      0.85    100015



In [16]:
def get_recommendations(user:str, algo, df_users:pd.DataFrame, df_animes:pd.DataFrame, filter_genre:str=None):
    """Get recommendations for a specific user. Needs the user name, the trained algo and a DataFrame with animes"""
    user_inner_uid = algo.trainset.to_inner_uid(user)
    df_rec = pd.DataFrame([algo.trainset.to_raw_uid(x) for x in algo.get_neighbors(user_inner_uid,10)], columns=['user']) #create df with list of nearest users
    df_rec = pd.merge(df_rec, df_users, how='inner', on='user') #merge with df that contain ratings
    df_rec = pd.merge(df_rec, df_animes, how='inner', left_on='item', right_on='MAL_ID') #merge with df that contain animes names
    if filter_genre:
        df_rec = df_rec[df_rec.Genres_list.map(set([filter_genre]).issubset)]
    return pd.merge(df_rec.groupby('item').rating.median().sort_values(ascending=False).head(10), df_animes, how='inner', left_on='item', right_on='MAL_ID') #compute median of ratings by users and select top 10 rated animes

In [17]:
get_recommendations('maxime', algo, df_full, df_animes, 'Military')

Unnamed: 0,rating,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,...,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1,Genres_list
0,2.0,33,Kenpuu Denki Berserk,8.49,"Action, Adventure, Demons, Drama, Fantasy, Hor...",Berserk,剣風伝奇ベルセルク,TV,25,"Oct 8, 1997 to Apr 1, 1998",...,65906.0,60815.0,29055.0,9477.0,3899.0,1748.0,671.0,456.0,842.0,"[Action, Adventure, Demons, Drama, Fantasy, Ho..."
1,2.0,10689,Appleseed XIII Remix Movie 1: Yuigon,6.41,"Action, Mecha, Military, Sci-Fi, Police",Unknown,劇場リミックス版 APPLESEED XIII ～遺言～,Movie,1,"Jun 13, 2011",...,55.0,121.0,271.0,200.0,118.0,49.0,19.0,14.0,18.0,"[Action, Mecha, Military, Sci-Fi, Police]"
2,2.0,8888,Code Geass: Boukoku no Akito 1 - Yokuryuu wa M...,7.41,"Action, Mecha, Military, Sci-Fi",Code Geass:Akito the Exiled - The Wyvern Arrives,コードギアス 亡国のアキト 第１章「翼竜は舞い降りた」,Movie,1,"Jul 16, 2012",...,9852.0,21416.0,23162.0,9427.0,3944.0,1496.0,638.0,297.0,248.0,"[Action, Mecha, Military, Sci-Fi]"
3,2.0,9135,Fullmetal Alchemist: The Sacred Star of Milos,7.32,"Action, Adventure, Comedy, Drama, Fantasy, Mag...",Fullmetal Alchemist:The Sacred Star of Milos,劇場版 鋼の錬金術師 嘆きの丘(ミロス)の聖なる星,Movie,1,"Jul 2, 2011",...,10797.0,20816.0,23630.0,11009.0,5045.0,2202.0,841.0,432.0,253.0,"[Action, Adventure, Comedy, Drama, Fantasy, Ma..."
4,2.0,9252,Break Blade 3: Kyoujin no Ato,7.81,"Action, Military, Fantasy, Mecha, Shounen",Broken Blade 3,ブレイク ブレイド 第三章 凶刃ノ痕,Movie,1,"Sep 25, 2010",...,6238.0,12366.0,8386.0,2463.0,920.0,219.0,85.0,53.0,35.0,"[Action, Military, Fantasy, Mecha, Shounen]"
5,2.0,9465,Break Blade 4: Sanka no Chi,7.87,"Action, Fantasy, Mecha, Military, Shounen",Broken Blade 4,ブレイク ブレイド 第四章 惨禍ノ地,Movie,1,"Oct 30, 2010",...,6676.0,11896.0,7701.0,2308.0,862.0,239.0,72.0,33.0,30.0,"[Action, Fantasy, Mecha, Military, Shounen]"
6,2.0,9724,Break Blade 5: Shisen no Hate,7.84,"Action, Military, Fantasy, Mecha, Shounen",Broken Blade 5,ブレイク ブレイド 死線ノ涯,Movie,1,"Jan 22, 2011",...,6238.0,11283.0,7466.0,2287.0,882.0,261.0,95.0,44.0,29.0,"[Action, Military, Fantasy, Mecha, Shounen]"
7,2.0,9737,Appleseed XIII,6.36,"Action, Mecha, Military, Police, Sci-Fi",Unknown,アップルシード XIII,OVA,13,"Jun 3, 2011 to Jan 25, 2012",...,209.0,471.0,939.0,837.0,526.0,309.0,132.0,80.0,83.0,"[Action, Mecha, Military, Police, Sci-Fi]"
8,2.0,10048,Fuyu no Semi Special,7.02,"Action, Military, Historical, Drama, Samurai, ...",Unknown,冬の蝉 ~特別編集版~,OVA,1,"Mar 26, 2008",...,177.0,250.0,363.0,243.0,128.0,42.0,18.0,19.0,46.0,"[Action, Military, Historical, Drama, Samurai,..."
9,2.0,10092,Break Blade 6: Doukoku no Toride,7.71,"Action, Military, Fantasy, Mecha, Shounen",Broken Blade 6,ブレイク ブレイド 第六章 慟哭ノ砦,Movie,1,"Mar 26, 2011",...,5406.0,10189.0,7851.0,2733.0,1063.0,394.0,138.0,59.0,41.0,"[Action, Military, Fantasy, Mecha, Shounen]"
