## Anime Recommender System

In [1]:
import numpy as np
import pandas as pd

In [2]:
### importing the files
anime = pd.read_csv('anime.csv')
ratings = pd.read_csv('anime_rating.csv')

In [3]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
ratings.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [5]:
print(anime.shape)
print(ratings.shape)

(12294, 7)
(7813737, 3)


In [6]:
### to know the missing values
anime.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [7]:
ratings.isnull().sum()

user_id     0
anime_id    0
rating      0
dtype: int64

In [8]:
### to find duplicates
anime.duplicated().sum()

0

In [9]:
ratings.duplicated().sum()

1

## Popularity Based Recommender System

In [10]:
### will display the top 50 popular animes with highest average rating with minimum 250 votes

In [11]:
### removing the unnecessery features in anime dataset
anime = anime[['anime_id', 'name', 'genre', 'type', 'episodes']]

In [12]:
### merging ratings and anime based on anime id
ratings_with_name = anime.merge(ratings, on = 'anime_id')

In [13]:
ratings_with_name.head()

Unnamed: 0,anime_id,name,genre,type,episodes,user_id,rating
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,99,5
1,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,152,10
2,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,244,10
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,271,10
4,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,278,-1


In [14]:
ratings_with_name.shape

(7813727, 7)

In [15]:
### counting the number of each book by its name
num_rating_df = ratings_with_name.groupby(['name'])['rating'].count().reset_index()
num_rating_df.rename(columns = {'rating':'num_rating'}, inplace = True)

In [16]:
num_rating_df

Unnamed: 0,name,num_rating
0,&quot;0&quot;,26
1,&quot;Aesop&quot; no Ohanashi yori: Ushi to Ka...,2
2,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hat...,782
3,&quot;Bungaku Shoujo&quot; Memoire,809
4,&quot;Bungaku Shoujo&quot; Movie,1535
...,...,...
11191,xxxHOLiC Kei,3413
11192,xxxHOLiC Movie: Manatsu no Yoru no Yume,2365
11193,xxxHOLiC Rou,1513
11194,xxxHOLiC Shunmuki,1974


In [17]:
### calculating avg of ratings for each book
avg_rating_df = ratings_with_name.groupby(['name'])['rating'].mean().reset_index()
avg_rating_df.rename(columns = {'rating':'avg_rating'}, inplace = True)
avg_rating_df

Unnamed: 0,name,avg_rating
0,&quot;0&quot;,2.769231
1,&quot;Aesop&quot; no Ohanashi yori: Ushi to Ka...,0.000000
2,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hat...,5.774936
3,&quot;Bungaku Shoujo&quot; Memoire,6.155748
4,&quot;Bungaku Shoujo&quot; Movie,6.457980
...,...,...
11191,xxxHOLiC Kei,6.720774
11192,xxxHOLiC Movie: Manatsu no Yoru no Yume,6.313742
11193,xxxHOLiC Rou,6.403173
11194,xxxHOLiC Shunmuki,6.238602


In [18]:
### merging avg_rating_df and num_rating_df
popularity_df = num_rating_df.merge(avg_rating_df, on = 'name')
popularity_df

Unnamed: 0,name,num_rating,avg_rating
0,&quot;0&quot;,26,2.769231
1,&quot;Aesop&quot; no Ohanashi yori: Ushi to Ka...,2,0.000000
2,&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hat...,782,5.774936
3,&quot;Bungaku Shoujo&quot; Memoire,809,6.155748
4,&quot;Bungaku Shoujo&quot; Movie,1535,6.457980
...,...,...,...
11191,xxxHOLiC Kei,3413,6.720774
11192,xxxHOLiC Movie: Manatsu no Yoru no Yume,2365,6.313742
11193,xxxHOLiC Rou,1513,6.403173
11194,xxxHOLiC Shunmuki,1974,6.238602


In [19]:
### keeping the books which gave num_rating > 250 and sort them in descending order
popular_df = popularity_df[popularity_df['num_rating'] > 250].sort_values('avg_rating', ascending = False).head(50)

In [20]:
popular_df

Unnamed: 0,name,num_rating,avg_rating
4948,Kimi no Na wa.,2199,8.297863
2994,Ginga Eiyuu Densetsu,903,8.239203
9355,Steins;Gate,19283,8.126796
2598,Fullmetal Alchemist: Brotherhood,24574,8.028933
3057,Gintama°,1386,7.95671
3936,Hunter x Hunter (2011),8575,7.924082
1519,Clannad: After Story,17854,7.835275
6550,Monster,4594,7.809099
3044,Gintama,4974,7.775231
1552,Code Geass: Hangyaku no Lelouch R2,24242,7.765943


In [21]:
### getting the final value
popular_df = popular_df.merge(anime, on = 'name').drop_duplicates('name')[['name', 'genre', 'type', 'episodes', 'num_rating', 'avg_rating']]
popular_df

Unnamed: 0,name,genre,type,episodes,num_rating,avg_rating
0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,2199,8.297863
1,Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,903,8.239203
2,Steins;Gate,"Sci-Fi, Thriller",TV,24,19283,8.126796
3,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,24574,8.028933
4,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,1386,7.95671
5,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,8575,7.924082
6,Clannad: After Story,"Drama, Fantasy, Romance, Slice of Life, Supern...",TV,24,17854,7.835275
7,Monster,"Drama, Horror, Mystery, Police, Psychological,...",TV,74,4594,7.809099
8,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",TV,201,4974,7.775231
9,Code Geass: Hangyaku no Lelouch R2,"Action, Drama, Mecha, Military, Sci-Fi, Super ...",TV,25,24242,7.765943


## Collaborative Filtering Based Recommender System using Cosine Similarity

In [22]:
### filtering the users who have rated atleast 200 votes
x = ratings_with_name.groupby(['user_id'])['rating'].count() > 500
filtered_users = x[x].index

In [23]:
filtered_users

Index([   17,    54,   201,   226,   271,   294,   342,   392,   446,   478,
       ...
       73272, 73286, 73340, 73356, 73362, 73378, 73395, 73408, 73499, 73502],
      dtype='int64', name='user_id', length=1843)

In [24]:
filtered_rating = ratings_with_name[ratings_with_name['user_id'].isin(filtered_users)]

In [25]:
filtered_rating

Unnamed: 0,anime_id,name,genre,type,episodes,user_id,rating
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,271,10
15,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,786,10
18,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,917,-1
38,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,1497,10
39,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,1504,10
...,...,...,...,...,...,...,...
7813722,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,39532,-1
7813723,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,48766,-1
7813724,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,60365,4
7813725,26081,Yasuji no Pornorama: Yacchimae!!,Hentai,Movie,1,27364,-1


In [26]:
### filtering the dataframe on anime which atleast have 100 ratings
n = filtered_rating.groupby(['name'])['rating'].count() >= 100
filtered_books = n[n].index

In [27]:
filtered_books

Index(['&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi',
       '&quot;Bungaku Shoujo&quot; Memoire',
       '&quot;Bungaku Shoujo&quot; Movie', '.hack//G.U. Returner',
       '.hack//G.U. Trilogy', '.hack//G.U. Trilogy: Parody Mode',
       '.hack//Gift', '.hack//Intermezzo', '.hack//Liminality',
       '.hack//Quantum',
       ...
       'ef: A Tale of Memories.', 'ef: A Tale of Memories. - Prologue',
       'ef: A Tale of Memories. - Recollections', 'iDOLM@STER Xenoglossia',
       's.CRY.ed', 'xxxHOLiC', 'xxxHOLiC Kei',
       'xxxHOLiC Movie: Manatsu no Yoru no Yume', 'xxxHOLiC Rou',
       'xxxHOLiC Shunmuki'],
      dtype='object', name='name', length=3097)

In [28]:
final_rating = filtered_rating[filtered_rating['name'].isin(filtered_books)]

In [29]:
final_rating

Unnamed: 0,anime_id,name,genre,type,episodes,user_id,rating
3,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,271,10
15,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,786,10
18,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,917,-1
38,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,1497,10
39,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,1504,10
...,...,...,...,...,...,...,...
7813054,1639,Boku no Pico,"Hentai, Yaoi",OVA,1,70585,5
7813095,1639,Boku no Pico,"Hentai, Yaoi",OVA,1,71790,10
7813096,1639,Boku no Pico,"Hentai, Yaoi",OVA,1,71792,1
7813143,1639,Boku no Pico,"Hentai, Yaoi",OVA,1,73135,2


In [30]:
### creating the pivot table
pt = final_rating.pivot_table(index = 'name', columns = 'user_id', values = 'rating')

In [31]:
pt

user_id,17,54,201,226,271,294,342,392,446,478,...,73272,73286,73340,73356,73362,73378,73395,73408,73499,73502
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,,,,,,7.0,,,,9.0,...,,9.0,,,,,,,,10.0
&quot;Bungaku Shoujo&quot; Memoire,,,,,,8.0,,,,9.0,...,,9.0,,,,,8.0,,,
&quot;Bungaku Shoujo&quot; Movie,,,,,,8.0,,4.0,9.0,,...,,9.0,9.0,,8.0,,9.0,,,10.0
.hack//G.U. Returner,,,,,,,,-1.0,5.0,,...,,,,,,,,,,
.hack//G.U. Trilogy,,,,,,,,-1.0,7.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xxxHOLiC,,,,,,7.0,,,8.0,,...,8.0,7.0,8.0,8.0,,,,-1.0,,10.0
xxxHOLiC Kei,,,,,,8.0,,,,,...,7.0,,,9.0,,,,-1.0,,10.0
xxxHOLiC Movie: Manatsu no Yoru no Yume,,,,,,7.0,,,,,...,8.0,-1.0,,8.0,,,,-1.0,,10.0
xxxHOLiC Rou,,,,,,,,,,,...,,,,,,,,,,


In [32]:
pt.fillna(0, inplace = True)
pt

user_id,17,54,201,226,271,294,342,392,446,478,...,73272,73286,73340,73356,73362,73378,73395,73408,73499,73502
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,9.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0
&quot;Bungaku Shoujo&quot; Memoire,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,9.0,...,0.0,9.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.0,0.0,8.0,0.0,4.0,9.0,0.0,...,0.0,9.0,9.0,0.0,8.0,0.0,9.0,0.0,0.0,10.0
.hack//G.U. Returner,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.hack//G.U. Trilogy,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,7.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xxxHOLiC,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,8.0,0.0,...,8.0,7.0,8.0,8.0,0.0,0.0,0.0,-1.0,0.0,10.0
xxxHOLiC Kei,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,...,7.0,0.0,0.0,9.0,0.0,0.0,0.0,-1.0,0.0,10.0
xxxHOLiC Movie: Manatsu no Yoru no Yume,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,...,8.0,-1.0,0.0,8.0,0.0,0.0,0.0,-1.0,0.0,10.0
xxxHOLiC Rou,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
from sklearn.metrics.pairwise import cosine_similarity

In [34]:
### getting the similarity between each book by calculating euclidean distance between them
similarity_scores = cosine_similarity(pt)

In [35]:
similarity_scores

array([[1.        , 0.7864124 , 0.71393328, ..., 0.31009577, 0.27855329,
        0.30176683],
       [0.7864124 , 1.        , 0.78368607, ..., 0.30207142, 0.3013176 ,
        0.30978266],
       [0.71393328, 0.78368607, 1.        , ..., 0.30495718, 0.30673277,
        0.33038664],
       ...,
       [0.31009577, 0.30207142, 0.30495718, ..., 1.        , 0.68260123,
        0.74064448],
       [0.27855329, 0.3013176 , 0.30673277, ..., 0.68260123, 1.        ,
        0.86673757],
       [0.30176683, 0.30978266, 0.33038664, ..., 0.74064448, 0.86673757,
        1.        ]])

In [36]:
def recommend(anime_name):
    # fetching index value of the anime
    index = np.where(pt.index == anime_name)[0][0]
    # fetching the similarity scores of that index
    distances = similarity_scores[index]
    # sorting the top 10 similarity values
    similar_items = sorted(list(enumerate(distances)), key = lambda x:x[1], reverse = True)[1:6]
    
    # printing the index names of those values
    for i in similar_items:
        print(pt.index[i[0]])

In [37]:
recommend('Kimi no Na wa.')

Shigatsu wa Kimi no Uso
Re:Zero kara Hajimeru Isekai Seikatsu
Boku dake ga Inai Machi
Kokoro ga Sakebitagatterunda.
ReLIFE


In [38]:
recommend('Great Teacher Onizuka')

Code Geass: Hangyaku no Lelouch
Death Note
Fullmetal Alchemist
Full Metal Panic!
Black Lagoon


In [39]:
recommend('Usagi Drop')

Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai.
Toradora!
Steins;Gate
Suzumiya Haruhi no Yuuutsu
Ore no Imouto ga Konnani Kawaii Wake ga Nai


## Collaborative Filtering Using k-Nearest Neighbors (kNN)

In [40]:
### csr_matrix avoids 0 values while calculating distance, hence takes less time
from scipy.sparse import csr_matrix

In [41]:
anime_sparse = csr_matrix(pt)

In [42]:
### importing k-NearestNeighbors clustering algorithm 
from sklearn.neighbors import NearestNeighbors

In [43]:
model = NearestNeighbors(algorithm = 'brute')

In [44]:
model.fit(anime_sparse)

In [45]:
### storing the anime name
anime_name = pt.index

In [46]:
import pickle

In [47]:
pickle.dump(model, open('anime_artifacts/model.pkl','wb'))
pickle.dump(final_rating, open('anime_artifacts/final_rating.pkl','wb'))
pickle.dump(pt, open('artifacts/pt.pkl','wb'))
pickle.dump(anime_name, open('artifacts/anime_name.pkl','wb'))

In [48]:
### creating the recommender function
def recommend_anime(anime_name):
    anime_index = np.where(pt.index == anime_name)[0][0]
    distance , suggestion = model.kneighbors(pt.iloc[anime_index,:].values.reshape(1,-1), n_neighbors = 6)
    
    for i in range(len(suggestion)):
        animes = pt.index[suggestion[i]]
        for j in animes:
            print(j)

In [49]:
recommend_anime('Kimi no Na wa.')

Kimi no Na wa.
Ame-iro Cocoa
Zutto Mae kara Suki deshita.: Kokuhaku Jikkou Iinkai
Himouto! Umaru-chanS
Battery
Bonjour♪Koiaji Pâtisserie


In [50]:
recommend_anime('Hunter x Hunter (2011)')

Hunter x Hunter (2011)
Hunter x Hunter Movie: Phantom Rouge
Hunter x Hunter Movie: The Last Mission
Kuroko no Basket 2nd Season
Hajime no Ippo: Rising
Baby Steps


In [51]:
recommend_anime('Fate/Zero 2nd Season')

Fate/Zero 2nd Season
Fate/Zero
Fate/stay night: Unlimited Blade Works
Fate/stay night: Unlimited Blade Works 2nd Season
Fate/stay night
Fate/stay night Movie: Unlimited Blade Works


In [52]:
recommend('Fate/Zero 2nd Season')

Fate/Zero
Fate/stay night: Unlimited Blade Works
Steins;Gate
Fate/stay night
Angel Beats!


In [53]:
recommend('One Punch Man')

No Game No Life
Shingeki no Kyojin
Kiseijuu: Sei no Kakuritsu
Sword Art Online
Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka


In [54]:
recommend_anime('One Punch Man')

One Punch Man
Gate: Jieitai Kanochi nite, Kaku Tatakaeri
Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka
Overlord
Boku no Hero Academia
Kiseijuu: Sei no Kakuritsu


In [55]:
recommend('Boku no Hero Academia')

One Punch Man
Koutetsujou no Kabaneri
Re:Zero kara Hajimeru Isekai Seikatsu
Noragami Aragoto
Shokugeki no Souma


In [56]:
recommend_anime('Boku no Hero Academia')

Boku no Hero Academia
Koutetsujou no Kabaneri
Mob Psycho 100
Shokugeki no Souma: Ni no Sara
Kiznaiver
Netoge no Yome wa Onnanoko ja Nai to Omotta?


In [57]:
recommend('Koutetsujou no Kabaneri')

Re:Zero kara Hajimeru Isekai Seikatsu
Hai to Gensou no Grimgar
Gate: Jieitai Kanochi nite, Kaku Tatakaeri
Boku no Hero Academia
Gate: Jieitai Kanochi nite, Kaku Tatakaeri 2nd Season


In [58]:
recommend('Re:Zero kara Hajimeru Isekai Seikatsu')

Kono Subarashii Sekai ni Shukufuku wo!
Charlotte
Boku dake ga Inai Machi
Gate: Jieitai Kanochi nite, Kaku Tatakaeri
Hai to Gensou no Grimgar


In [59]:
recommend('Kono Subarashii Sekai ni Shukufuku wo!')

Gate: Jieitai Kanochi nite, Kaku Tatakaeri
Gate: Jieitai Kanochi nite, Kaku Tatakaeri 2nd Season
Dungeon ni Deai wo Motomeru no wa Machigatteiru Darou ka
Overlord
Hai to Gensou no Grimgar


In [60]:
recommend('Monster')

Mushishi
Death Note
Baccano!
Great Teacher Onizuka
Black Lagoon


In [61]:
recommend('Fate/Zero')

Fate/Zero 2nd Season
Fate/stay night
Steins;Gate
Angel Beats!
Bakemonogatari


In [62]:
recommend('Neon Genesis Evangelion')

Neon Genesis Evangelion: The End of Evangelion
Evangelion: 1.0 You Are (Not) Alone
Suzumiya Haruhi no Yuuutsu
Tengen Toppa Gurren Lagann
Code Geass: Hangyaku no Lelouch


In [63]:
recommend('Boku no Pico')

Pico to Chico
Pico x CoCo x Chico
Sensitive Pornograph
Papa to Kiss in the Dark
Okane ga Nai
