In [103]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
import seaborn as sns 

%matplotlib inline

In [104]:
anime = pd.read_csv('./data/anime.csv')
rating = pd.read_csv('./data/rating.csv')

In [105]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [106]:
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [107]:
print(rating.shape, anime.shape)

(7813737, 3) (12294, 7)


In [108]:
means = rating.groupby('user_id').mean().drop(['anime_id'], axis=1)

In [109]:
means.head()

Unnamed: 0_level_0,rating
user_id,Unnamed: 1_level_1
1,-0.712418
2,2.666667
3,7.382979
4,-1.0
5,4.263383


In [110]:
#rating = pd.merge(rating, means, on='user_id')

In [111]:
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [112]:
#rating = rating.drop(rating[rating['rating_x'] < rating['rating_y']].index)

In [113]:
print(rating.shape, anime.shape)

(7813737, 3) (12294, 7)


In [114]:
data = pd.merge(anime, rating, on='anime_id')

In [115]:
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating_x,members,user_id,rating_y
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,99,5
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,152,10
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,244,10
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,271,10
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,278,-1


In [116]:
anime["episodes"] = anime["episodes"].map(lambda x:np.nan if x=="Unknown" else x)
anime["episodes"].fillna(anime["episodes"].median(), inplace = True)
anime['episodes'] = pd.to_numeric(anime['episodes'])
anime.describe()

Unnamed: 0,anime_id,episodes,rating,members
count,12294.0,12294.0,12064.0,12294.0
mean,14058.221653,12.095412,6.473902,18071.34
std,11455.294701,46.244062,1.026746,54820.68
min,1.0,1.0,1.67,5.0
25%,3484.25,1.0,5.88,225.0
50%,10260.5,2.0,6.57,1550.0
75%,24794.5,12.0,7.18,9437.0
max,34527.0,1818.0,10.0,1013917.0


In [117]:
genre_features = anime.genre.str.get_dummies(sep=', ')
print(genre_features.shape)
genre_features.head()

(12294, 43)


Unnamed: 0,Action,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,...,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Thriller,Vampire,Yaoi,Yuri
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,1,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [118]:
cosine_matrix = cosine_similarity(genre_features)

In [119]:
def recommend_anime(anime_index):
    cos_similarity = pd.DataFrame(
        cosine_matrix[anime_index]).rename({0: 'cos_sim'},
                                           axis = 'columns')
    weighted_score = pd.DataFrame(
        anime.rating * cos_similarity.cos_sim,
        columns = ['cos_score'])
    result = pd.concat(
        [anime.name, anime.genre, cos_similarity.cos_sim, weighted_score.cos_score], 
        axis = 'columns')
    title, genres = anime.loc[anime_index, ['name', 'genre']]
#     print("Shows similar to '%s', which has genres '%s' are:" % (title, genres))
    buf = result.drop([anime_index]).sort_values(by=
                                           'cos_score', ascending = False
                                          )[['name', 'cos_score']].head(5)
    return buf

In [145]:
my_favorite_animes = ['Tengen Toppa Gurren Lagann', 'JoJo no Kimyou na Bouken', 'No Game No Life', 'Death Note']

In [146]:
anime_list_indices = anime.loc[anime.name.isin(my_favorite_animes)].index

In [152]:
recom = []
buf = []
for index in anime_list_indices:
    output = list(recommend_anime(index).name.values)
    recom += output[2:5]
print(recom)

['Tengen Toppa Gurren Lagann: Ore no Gurren wa Pikka-Pika!!', 'Suisei no Gargantia', 'Saber Marionette J to X', 'Mousou Dairinin', 'Higurashi no Naku Koro ni', 'Jigoku Shoujo Mitsuganae', 'Tonari no Totoro', 'Golden Boy', 'Nanatsu no Taizai', 'Dragon Ball Z Special 2: Zetsubou e no Hankou!! Nokosareta Chousenshi - Gohan to Trunks', 'Tegamibachi', 'Sen to Chihiro no Kamikakushi']
