## Installing Dependencies

In [1]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Installing collected packages: surprise
Successfully installed surprise-0.1


## Importing Libraries

In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

import warnings; warnings.simplefilter('ignore')

In [3]:
anime_info_df = pd.read_csv('../input/anime-recommendation-database-2020/anime.csv')
anime_desc_df = pd.read_csv('../input/anime-recommendation-database-2020/anime_with_synopsis.csv')
rating_df = pd.read_csv('../input/anime-recommendations-database/rating.csv')

In [4]:
anime_info_df.head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [5]:
anime_df = pd.merge(anime_desc_df,anime_info_df[['MAL_ID','Type','Popularity','Members','Favorites']],on='MAL_ID')
anime_df.head(10)

Unnamed: 0,MAL_ID,Name,Score,Genres,sypnopsis,Type,Popularity,Members,Favorites
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space","In the year 2071, humanity has colonized sever...",TV,39,1251960,61971
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space","other day, another bounty—such is the life of ...",Movie,518,273145,1174
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen","Vash the Stampede is the man with a $$60,000,0...",TV,201,558913,12944
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",ches are individuals with special powers like ...,TV,1467,94683,587
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",It is the dark century and the people are suff...,TV,4369,13224,18
5,15,Eyeshield 21,7.95,"Action, Sports, Comedy, Shounen",Sena is like any other shy kid starting high s...,TV,1003,148259,2066
6,16,Hachimitsu to Clover,8.06,"Comedy, Drama, Josei, Romance, Slice of Life","Yuuta Takemoto, a sophomore at an arts college...",TV,687,214499,4101
7,17,Hungry Heart: Wild Striker,7.59,"Slice of Life, Comedy, Sports, Shounen",Kyosuke Kano has lived under the shadow of his...,TV,3612,20470,231
8,18,Initial D Fourth Stage,8.15,"Action, Cars, Sports, Drama, Seinen",Takumi Fujiwara finally joins Ryousuke and Kei...,TV,1233,117929,979
9,19,Monster,8.76,"Drama, Horror, Mystery, Police, Psychological,...","Dr. Kenzou Tenma, an elite neurosurgeon recent...",TV,169,614100,29436


In [6]:
anime_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16214 entries, 0 to 16213
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   MAL_ID      16214 non-null  int64 
 1   Name        16214 non-null  object
 2   Score       16214 non-null  object
 3   Genres      16214 non-null  object
 4   sypnopsis   16206 non-null  object
 5   Type        16214 non-null  object
 6   Popularity  16214 non-null  int64 
 7   Members     16214 non-null  int64 
 8   Favorites   16214 non-null  int64 
dtypes: int64(4), object(5)
memory usage: 1.2+ MB


In [7]:
anime_df["Score"].describe()

count       16214
unique        532
top       Unknown
freq         5123
Name: Score, dtype: object

In [8]:
anime_df = anime_df[(anime_df["Score"] != "Unknown")] 
anime_df.shape

(11091, 9)

## Content  Filtering

In [9]:
anime_df['sypnopsis'] = anime_df['sypnopsis'].fillna('')

In [10]:
tfidf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime_df['sypnopsis'])
tfidf_matrix.shape

(11091, 386042)

In [11]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim.shape

(11091, 11091)

In [12]:
anime_df = anime_df.reset_index()
titles = anime_df['Name']
indices = pd.Series(anime_df.index, index=anime_df['Name'])

In [13]:
def content_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    anime_indices = [i[0] for i in sim_scores]
    
    anime_lst = anime_df.iloc[anime_indices][['Name', 'Members', 'Score']]
    favorite_count = anime_lst[anime_lst['Members'].notnull()]['Members'].astype('int')
    score_avg = anime_lst[anime_lst['Score'].notnull()]['Score'].astype('float')
    C = score_avg.mean()
    m = favorite_count.quantile(0.60)
    qualified = anime_lst[(anime_lst['Members'] >= m) & (anime_lst['Members'].notnull()) & (anime_lst['Score'].notnull())]
    qualified['Members'] = qualified['Members'].astype('int')
    qualified['Score'] = qualified['Score'].astype('float')
    def weighted_rating(x):
        v = x['Members']
        R = x['Score']
        return (v/(v+m) * R) + (m/(m+v) * C)   
    
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(10)
    
    return qualified

In [14]:
content_recommendations('Naruto').head(10)

Unnamed: 0,Name,Members,Score,wr
1506,Naruto: Shippuuden,1543765,8.16,8.051279
824,Higurashi no Naku Koro ni,638491,7.95,7.762043
5998,The Last: Naruto the Movie,352160,7.76,7.528764
5623,Naruto: Shippuuden Movie 6 - Road to Ninja,223826,7.67,7.400756
7245,Boruto: Naruto the Movie,320603,7.5,7.342222
4381,Naruto: Shippuuden Movie 4 - The Lost Tower,172051,7.42,7.231966
2089,Naruto: Shippuuden Movie 1,211544,7.29,7.178705
3145,Naruto: Shippuuden Movie 2 - Kizuna,188680,7.29,7.171518
403,Naruto Movie 1: Dai Katsugeki!! Yuki Hime Shin...,215046,7.1,7.072324
826,Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...,172509,6.88,6.956515


## Collaborative Filtering

In [15]:
rating_df.head(10)

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
5,1,355,-1
6,1,356,-1
7,1,442,-1
8,1,487,-1
9,1,846,-1


In [16]:
rating_df['rating'].value_counts()

 8     1646019
-1     1476496
 7     1375287
 9     1254096
 10     955715
 6      637775
 5      282806
 4      104291
 3       41453
 2       23150
 1       16649
Name: rating, dtype: int64

In [17]:
# rating_df['rating'] = rating_df['rating'].apply(lambda x: 1 if x < 0 else x)
rating_df = rating_df[(rating_df["rating"] != -1)] 

In [18]:
rating_df.head(5)

Unnamed: 0,user_id,anime_id,rating
47,1,8074,10
81,1,11617,10
83,1,11757,10
101,1,15451,10
153,2,11771,10


In [19]:
reader = Reader()
rating_data = Dataset.load_from_df(rating_df, reader)
svd = SVD()

In [20]:
trainset = rating_data.build_full_trainset()

In [21]:
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f6f82db1190>

In [22]:
svd.predict(1, 356, 5)

Prediction(uid=1, iid=356, r_ui=5, est=5, details={'was_impossible': False})

## Hybrid Filtering

In [23]:
id_map = anime_df[['MAL_ID']]
id_map['id'] = list(range(1,anime_df.shape[0]+1,1))
id_map = id_map.merge(anime_df[['MAL_ID', 'Name']], on='MAL_ID').set_index('Name')

In [24]:
indices_map = id_map.set_index('id')

In [25]:
def hybrid_recommendations(user_id,title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]    
    anime_indices = [i[0] for i in sim_scores]
            
    anime_lst = anime_df.iloc[anime_indices][['MAL_ID','Name', 'Members', 'Score','Genres']]
    favorite_count = anime_lst[anime_lst['Members'].notnull()]['Members'].astype('int')
    score_avg = anime_lst[anime_lst['Score'].notnull()]['Score'].astype('float')
    C = score_avg.mean()
    m = favorite_count.quantile(0.60)
    qualified = anime_lst[(anime_lst['Members'] >= m) & (anime_lst['Members'].notnull()) & (anime_lst['Score'].notnull())]    
    qualified['Members'] = qualified['Members'].astype('int')
    qualified['Score'] = qualified['Score'].astype('float')
    def weighted_rating(x):
        v = x['Members']
        R = x['Score']
        return (v/(v+m) * R) + (m/(m+v) * C)   
    
    qualified['wr'] = qualified.apply(weighted_rating, axis=1)
    qualified = qualified.sort_values('wr', ascending=False).head(30)    
    
    qualified[['id']] = list(range(1,qualified.shape[0]+1,1))  
    qualified['est'] = qualified['id'].apply(lambda x: svd.predict(user_id, indices_map.loc[x]['MAL_ID']).est)
    qualified = qualified.sort_values('est', ascending=False)
    result = qualified[['MAL_ID','Name','Genres','Score']]
    return result.head(10)    

In [26]:
hybrid_recommendations(8, 'Trigun')

Unnamed: 0,MAL_ID,Name,Genres,Score
4270,7785,Yojouhan Shinwa Taikei,"Mystery, Comedy, Psychological, Romance",8.61
9226,35848,Promare,"Action, Mecha, Sci-Fi, Super Power",8.08
3037,4106,Trigun: Badlands Rumble,"Action, Adventure, Comedy, Drama, Sci-Fi, Shounen",7.97
5700,14345,Btooom!,"Action, Sci-Fi, Psychological, Seinen",7.39
181,206,Lodoss-tou Senki: Eiyuu Kishi Den,"Action, Adventure, Drama, Magic, Romance, Fantasy",7.16
349,384,Gantz,"Action, Sci-Fi, Horror, Psychological, Superna...",7.05
658,732,Vampire Hunter D,"Action, Sci-Fi, Horror, Supernatural, Vampire",7.08
858,976,Yokohama Kaidashi Kikou: Quiet Country Cafe,"Sci-Fi, Slice of Life, Seinen",7.16
1692,1951,Manie-Manie: Meikyuu Monogatari,"Adventure, Fantasy, Horror, Sci-Fi, Supernatural",7.05
9536,36816,Godzilla 2: Kessen Kidou Zoushoku Toshi,"Action, Adventure, Sci-Fi",6.68
