## Installing Dependencies

In [1]:
!pip install surprise



## Importing Libraries

In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer, MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

import warnings; warnings.simplefilter('ignore')

In [3]:
anime_info_df = pd.read_csv('anime.csv')
anime_desc_df = pd.read_csv('anime_with_synopsis.csv')
rating_df = pd.read_csv('rating.csv')

In [4]:
anime_info_df.head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [5]:
anime_df = pd.merge(anime_desc_df,anime_info_df[['MAL_ID','Type','Popularity','Members','Favorites']],on='MAL_ID')
anime_df.head(10)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,Type,Popularity,Members,Favorites
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...",TV,39,1251960,61971
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...",Movie,518,273145,1174
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...",TV,201,558913,12944
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,TV,1467,94683,587
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,TV,4369,13224,18
5,15,Eyeshield 21,7.95,"Action, Sports, Comedy, Shounen",Sena is like any other shy kid starting high s...,TV,1003,148259,2066
6,16,Hachimitsu to Clover,8.06,"Comedy, Drama, Josei, Romance, Slice of Life","Yuuta Takemoto, a sophomore at an arts college...",TV,687,214499,4101
7,17,Hungry Heart: Wild Striker,7.59,"Slice of Life, Comedy, Sports, Shounen",Kyosuke Kano has lived under the shadow of his...,TV,3612,20470,231
8,18,Initial D Fourth Stage,8.15,"Action, Cars, Sports, Drama, Seinen",Takumi Fujiwara finally joins Ryousuke and Kei...,TV,1233,117929,979
9,19,Monster,8.76,"Drama, Horror, Mystery, Police, Psychological,...","Dr. Kenzou Tenma, an elite neurosurgeon recent...",TV,169,614100,29436


In [6]:
anime_df = anime_df[(anime_df["Score"] != "Unknown") & ((anime_df["Type"] == "TV") | (anime_df["Type"] == "Movie")) ] 
anime_df.shape

(5848, 9)

## Content  Filtering

In [7]:
anime_df['sypnopsis'] = anime_df['sypnopsis'].fillna('')

In [8]:
tfidf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime_df['sypnopsis'])
tfidf_matrix.shape

(5848, 278437)

In [9]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

(5848, 5848)

In [10]:
anime_df = anime_df.reset_index()
titles = anime_df['Name']
indices = pd.Series(anime_df.index, index=anime_df['Name'])

In [11]:
def content_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    anime_indices = [i[0] for i in sim_scores]
    
    anime_lst = anime_df.iloc[anime_indices][['Name', 'Members', 'Score']]
    favorite_count = anime_lst[anime_lst['Members'].notnull()]['Members'].astype('int')
    score_avg = anime_lst[anime_lst['Score'].notnull()]['Score'].astype('float')
    C = score_avg.mean()
    m = favorite_count.quantile(0.60)
    qualified = anime_lst[(anime_lst['Members'] >= m) & (anime_lst['Members'].notnull()) & (anime_lst['Score'].notnull())]
    qualified['Members'] = qualified['Members'].astype('int')
    qualified['Score'] = qualified['Score'].astype('float')
    def weighted_rating(x):
        v = x['Members']
        R = x['Score']
        return (v/(v+m) * R) + (m/(m+v) * C)   
    
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(10)
    
    return qualified

In [12]:
content_recommendations('Naruto').head(10)

Unnamed: 0,Name,Members,Score,wr
126,Mononoke Hime,876813,8.72,8.447186
1011,Naruto: Shippuuden,1543765,8.16,8.050984
623,Higurashi no Naku Koro ni,638491,7.95,7.766235
3495,The Last: Naruto the Movie,352160,7.76,7.541194
3326,Naruto: Shippuuden Movie 6 - Road to Ninja,223826,7.67,7.421472
4091,Boruto: Naruto the Movie,320603,7.5,7.360519
3445,Kamisama no Inai Nichiyoubi,190201,7.35,7.233971
1326,Naruto: Shippuuden Movie 1,211544,7.29,7.207811
2012,Naruto: Shippuuden Movie 2 - Kizuna,188680,7.29,7.2027
325,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,215046,7.1,7.104845


## Collaborative Filtering

In [13]:
rating_df.head(10)

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
5,1,355,-1
6,1,356,-1
7,1,442,-1
8,1,487,-1
9,1,846,-1


In [14]:
reader = Reader()
rating_data = Dataset.load_from_df(rating_df, reader)
svd = SVD()

In [15]:
cross_validate(svd, rating_data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

In [16]:
trainset = rating_data.build_full_trainset()

In [17]:
svd.fit(trainset)

In [18]:
svd.predict(1, 356, -1)

In [19]:
import joblib
joblib.dump(svd,'svd.joblib')

In [21]:
# import joblib

# model = joblib.load('svd.joblib')

## Hybrid Filtering

In [142]:
id_map = anime_df[['MAL_ID']]
id_map['id'] = list(range(1,anime_df.shape[0]+1,1))
id_map = id_map.merge(anime_df[['MAL_ID', 'Name']], on='MAL_ID').set_index('Name')

In [143]:
indices_map = id_map.set_index('id')

In [144]:
def hybrid_recommendations(user_id,title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    anime_indices = [i[0] for i in sim_scores]
    
    anime_lst = anime_df.iloc[anime_indices][['MAL_ID','Name', 'Members', 'Score','Genres']]
    favorite_count = anime_lst[anime_lst['Members'].notnull()]['Members'].astype('int')
    score_avg = anime_lst[anime_lst['Score'].notnull()]['Score'].astype('float')
    C = score_avg.mean()
    m = favorite_count.quantile(0.60)
    qualified = anime_lst[(anime_lst['Members'] >= m) & (anime_lst['Members'].notnull()) & (anime_lst['Score'].notnull())]    
    qualified['Members'] = qualified['Members'].astype('int')
    qualified['Score'] = qualified['Score'].astype('float')
    def weighted_rating(x):
        v = x['Members']
        R = x['Score']
        return (v/(v+m) * R) + (m/(m+v) * C)   
    
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(30)    
    
    qualified[['id']] = list(range(1,qualified.shape[0]+1,1))  
    qualified['est'] = qualified['id'].apply(lambda x: model.predict(user_id, indices_map.loc[x]['MAL_ID']).est)
    qualified = qualified.sort_values('est', ascending=False)
    result = qualified[['MAL_ID','Name','Genres','Score']]
    return result.head(10)    

In [145]:
hybrid_recommendations(1, 'Trigun')

Unnamed: 0,MAL_ID,Name,Genres,Score
5289,37345,Plunderer,"Action, Ecchi, Fantasy, Shounen",6.5
180,239,Gankutsuou,"Drama, Mystery, Sci-Fi, Supernatural, Thriller",8.17
1943,4106,Trigun: Badlands Rumble,"Action, Adventure, Comedy, Drama, Sci-Fi, Shounen",7.97
900,1535,Death Note,"Mystery, Police, Psychological, Supernatural, ...",8.63
2664,7785,Yojouhan Shinwa Taikei,"Mystery, Comedy, Psychological, Romance",8.61
24,43,Koukaku Kidoutai,"Action, Mecha, Police, Psychological, Sci-Fi, ...",8.29
1775,3588,Soul Eater,"Action, Fantasy, Comedy, Supernatural, Shounen",7.88
5604,39565,Boku no Hero Academia the Movie 2: Heroes:Rising,"Action, Super Power, Shounen",8.08
5090,35848,Promare,"Action, Mecha, Sci-Fi, Super Power",8.08
3232,12445,Tasogare Otome x Amnesia,"Horror, Mystery, Romance, School, Shounen, Sup...",7.86
