# Anime Recommender

In [1]:
import numpy as np
import pandas as pd

import warnings

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

warnings.filterwarnings("always")
warnings.filterwarnings("ignore")

In [2]:
anime = pd.read_csv('Anime_data.csv')

In [3]:
anime.head(2)

Unnamed: 0,Anime_id,Title,Genre,Synopsis,Type,Producer,Studio,Rating,ScoredBy,Popularity,Members,Episodes,Source,Aired,Link
0,1,Cowboy Bebop,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...","In the year 2071, humanity has colonized sever...",TV,['Bandai Visual'],['Sunrise'],8.81,363889.0,39.0,704490.0,26.0,Original,"Apr 3, 1998 to Apr 24, 1999",https://myanimelist.net/anime/1/Cowboy_Bebop
1,5,Cowboy Bebop: Tengoku no Tobira,"['Action', 'Space', 'Drama', 'Mystery', 'Sci-Fi']","Another day, another bounty—such is the life o...",Movie,"['Sunrise', 'Bandai Visual']",['Bones'],8.41,111187.0,475.0,179899.0,1.0,Original,"Sep 1, 2001",https://myanimelist.net/anime/5/Cowboy_Bebop__...


In [4]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17002 entries, 0 to 17001
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Anime_id    17002 non-null  int64  
 1   Title       17002 non-null  object 
 2   Genre       14990 non-null  object 
 3   Synopsis    15583 non-null  object 
 4   Type        16368 non-null  object 
 5   Producer    7635 non-null   object 
 6   Studio      7919 non-null   object 
 7   Rating      14425 non-null  float64
 8   ScoredBy    13227 non-null  float64
 9   Popularity  16368 non-null  float64
 10  Members     17002 non-null  float64
 11  Episodes    14085 non-null  float64
 12  Source      15075 non-null  object 
 13  Aired       16368 non-null  object 
 14  Link        16368 non-null  object 
dtypes: float64(5), int64(1), object(9)
memory usage: 1.9+ MB


In [5]:
anime = anime[['Title', 'Genre', 'Synopsis', 'Type']]

In [6]:
anime_df = anime
anime_df

Unnamed: 0,Title,Genre,Synopsis,Type
0,Cowboy Bebop,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...","In the year 2071, humanity has colonized sever...",TV
1,Cowboy Bebop: Tengoku no Tobira,"['Action', 'Space', 'Drama', 'Mystery', 'Sci-Fi']","Another day, another bounty—such is the life o...",Movie
2,Trigun,"['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Vash the Stampede is the man with a $$60,000,0...",TV
3,Witch Hunter Robin,"['Action', 'Magic', 'Police', 'Supernatural', ...",Witches are individuals with special powers li...,TV
4,Bouken Ou Beet,"['Adventure', 'Fantasy', 'Shounen', 'Supernatu...",It is the dark century and the people are suff...,TV
...,...,...,...,...
16997,Sore Ike! Anpanman: Anpanman to Kaizoku Lobster,,,Special
16998,X Bomber,,,TV
16999,X Bomber Pilot,,,Special
17000,Ganko-chan,,,


In [7]:
anime_df.isnull().sum().sort_values(ascending=False)

Genre       2012
Synopsis    1419
Type         634
Title          0
dtype: int64

In [8]:
print(anime_df['Synopsis'].mode()[0])
print(anime_df['Type'].mode()[0])

No synopsis has been added for this series yet. 
 
 Click here  to update this information.
TV


In [9]:
anime_df['Synopsis'] = anime_df['Synopsis'].fillna(
    anime_df['Synopsis'].dropna().mode().values[0]
)
anime_df['Type'] = anime_df['Type'].fillna(
    anime_df['Type'].dropna().mode().values[0]
)
anime_df['Genre'] = anime_df['Genre'].fillna(
    anime_df['Genre'].dropna().mode().values[0]
)

In [10]:
anime_df.isnull().sum()

Title       0
Genre       0
Synopsis    0
Type        0
dtype: int64

In [11]:
anime_df = anime_df[anime_df['Type']=='TV']

In [12]:
anime_df.drop('Type', axis=1, inplace=True)

In [13]:
indices = pd.Series(anime_df.index, index = anime_df['Title'])

In [14]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf2 = TfidfVectorizer(stop_words='english')
count = CountVectorizer(stop_words='english')

In [15]:
import time
start=time.time()
tfidf_matrix = tfidf.fit_transform(anime_df['Title'])
count_matrix = count.fit_transform(anime_df['Genre'])
tfidf2_matrix = tfidf2.fit_transform(anime_df['Synopsis'])
end=time.time()
print(end-start)

2.382981300354004


In [16]:
import time
start=time.time()
name_similarity = cosine_similarity(tfidf_matrix)
genre_similarity = cosine_similarity(count_matrix)
synopsis_similarity = cosine_similarity(tfidf2_matrix)
end=time.time()
print(end-start)

2.7610018253326416


In [17]:
def get_recommendations(_anime):
    i = indices[_anime]
    
    name_score = list(enumerate(name_similarity[i]))
    genre_score = list(enumerate(genre_similarity[i]))
    synopsis_score = list(enumerate(synopsis_similarity[i]))
    
    name_score = sorted(name_score, key = lambda x: x[0])
    genre_score = sorted(genre_score, key = lambda x: x[0])
    synopsis_score = sorted(synopsis_score, key = lambda x: x[0])
    
    combined_score = [(i, (sc_1 + sc_2 + sc_3) / 3) for (i, sc_1), (_, sc_2), (_, sc_3) in zip(name_score, genre_score, synopsis_score)]
    
    combined_score = sorted(combined_score, key = lambda x: x[1], reverse = True)
    
    anime_recs = []
    
    index = 0
    while len(anime_recs) != 10:
        anime_id = combined_score[1:][index][0]
        index += 1
        if _anime in indices.iloc[[anime_id]].index[0]:
            continue
        else:
            anime_recs.append(indices.iloc[[anime_id]].index[0])
    
    
    print(f'If you liked {_anime}, you should try:')
    for i, v in list(enumerate(anime_recs)):
        print(f'{i + 1}. {v}')

In [18]:
import time
start=time.time()
get_recommendations('Dragon Ball')
end=time.time()
print(end-start)

If you liked Dragon Ball, you should try:
1. Seikai no Monshou
2. Seikai no Senki
3. Uchuu no Stellvia
4. Ginga Eiyuu Densetsu: Die Neue These - Kaikou
5. AWOL
6. Mobile Suit Gundam SEED
7. Macross
8. Mobile Suit Gundam
9. Mobile Suit Gundam SEED Destiny
10. Macross F
0.12200427055358887
