Anime Recommendation System


Importing the dependencies

In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [6]:
# loading the data from the csv file to apandas dataframe
anime_data = pd.read_csv("myAnimeData.csv")

In [7]:
# printing the first 5 rows of the dataframe
anime_data.head()

Unnamed: 0,animeID,name,premiered,genre,type,episodes,producer,licensor,studio,source,scored,scoredBy,members
0,1,Cowboy Bebop,Spring 1998,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...",TV,26.0,['Bandai Visual'],"['Funimation', 'Bandai Entertainment']",['Sunrise'],Original,8.81,363889.0,704490
1,5,Cowboy Bebop: Tengoku no Tobira,,"['Action', 'Space', 'Drama', 'Mystery', 'Sci-Fi']",Movie,1.0,"['Sunrise', 'Bandai Visual']",['Sony Pictures Entertainment'],['Bones'],Original,8.41,111187.0,179899
2,6,Trigun,Spring 1998,"['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...",TV,26.0,['Victor Entertainment'],"['Funimation', 'Geneon Entertainment USA']",['Madhouse'],Manga,8.31,197451.0,372709
3,7,Witch Hunter Robin,Summer 2002,"['Action', 'Magic', 'Police', 'Supernatural', ...",TV,26.0,['Bandai Visual'],"['Funimation', 'Bandai Entertainment']",['Sunrise'],Original,7.34,31875.0,74889
4,8,Beet the Vandel Buster,Fall 2004,"['Adventure', 'Fantasy', 'Shounen', 'Supernatu...",TV,52.0,[],[],['Toei Animation'],Manga,7.04,4757.0,11247


In [8]:
# number of rows and columns in the data frame

anime_data.shape

(14966, 13)

In [10]:
# selecting the relevant features for recommendation

selected_features = ['name','genre','producer','licensor','studio']
print(selected_features)

['name', 'genre', 'producer', 'licensor', 'studio']


In [11]:
# replacing the null valuess with null string

for feature in selected_features:
  anime_data[feature] = movies_data[feature].fillna('')

In [12]:
# combining all the 5 selected features

combined_features = anime_data['name']+' '+anime_data['genre']+' '+anime_data['producer']+' '+anime_data['licensor']+' '+anime_data['studio']

In [13]:
print(combined_features)

0        Cowboy Bebop ['Action', 'Adventure', 'Comedy',...
1        Cowboy Bebop: Tengoku no Tobira ['Action', 'Sp...
2        Trigun ['Action', 'Sci-Fi', 'Adventure', 'Come...
3        Witch Hunter Robin ['Action', 'Magic', 'Police...
4        Beet the Vandel Buster ['Adventure', 'Fantasy'...
                               ...                        
14961    Touhou Gensou Mangekyou ['Action', 'Fantasy'] ...
14962    Touhou Gensou Mangekyou ['Action', 'Fantasy'] ...
14963    Meguru @ Love ['Action', 'Comedy', 'Drama', 'E...
14964    Choujigen Game Neptune: Hi☆Light ['Action', 'C...
14965    Gibiate ['Action', 'Fantasy', 'Horror', 'Marti...
Length: 14966, dtype: object


In [14]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [15]:
feature_vectors = vectorizer.fit_transform(combined_features)

Cosine Similarity

In [16]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [17]:
print(similarity)

[[1.         0.67797289 0.23476476 ... 0.05222821 0.07635479 0.02274347]
 [0.67797289 1.         0.127971   ... 0.07246435 0.05303605 0.01867868]
 [0.23476476 0.127971   1.         ... 0.05505412 0.08048612 0.02397405]
 ...
 [0.05222821 0.07246435 0.05505412 ... 1.         0.08417083 0.04939897]
 [0.07635479 0.05303605 0.08048612 ... 0.08417083 1.         0.04894165]
 [0.02274347 0.01867868 0.02397405 ... 0.04939897 0.04894165 1.        ]]


Getting the animation name from the user

In [18]:
# getting the animation name from the user

anime_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : naruto


In [21]:
# creating a list with all the animation names given in the dataset

list_of_all_titles = anime_data['name'].tolist()
print(list_of_all_titles)

['Cowboy Bebop', 'Cowboy Bebop: Tengoku no Tobira', 'Trigun', 'Witch Hunter Robin', 'Beet the Vandel Buster', 'Eyeshield 21', 'Hachimitsu to Clover', 'Hungry Heart: Wild Striker', 'Initial D Fourth Stage', 'Monster', 'Naruto', 'One Piece', 'Tennis no Ouji-sama', 'Ring ni Kakero 1', 'School Rumble', 'Sunabouzu', 'Texhnolyze', 'Trinity Blood', 'Yakitate!! Japan', 'Zipang', 'Neon Genesis Evangelion', 'Neon Genesis Evangelion: Death & Rebirth', 'Neon Genesis Evangelion: The End of Evangelion', 'Kenpuu Denki Berserk', 'Ghost in the Shell', 'Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen', 'Rurouni Kenshin: Meiji Kenkaku Romantan', 'Rurouni Kenshin: Meiji Kenkaku Romantan - Ishinshishi e no Chinkonka', 'Akira', '.hack//Sign', 'Aa! Megami-sama!', 'Aa! Megami-sama! (TV)', 'Tenshi Kinryouku', 'Kidou Tenshi Angelic Layer', 'Ai Yori Aoshi', 'Appleseed (Movie)', 'Arc the Lad', 'Avenger', 'Beck', 'Blue Gender', 'Chobits', 'Chrno Crusade', 'D.N.Angel', 'D.C.: Da Capo', 'DearS', 'Rozen Maiden'

In [23]:
# finding the close match for the animation name given by the user

find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)
print(find_close_match)

['Naruto', 'Haruwo']


In [24]:
close_match = find_close_match[0]
print(close_match)

Naruto


In [27]:
# finding the index of the animation with title

index_of_the_anime = anime_data[anime_data.name == close_match]['animeID'].values[0]
print(index_of_the_anime)

20


In [29]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_anime]))
print(similarity_score)

[(0, 0.061438085606387284), (1, 0.05045766012224259), (2, 0.06476231699610174), (3, 0.02659152261663198), (4, 0.0), (5, 0.26999368223531545), (6, 0.039355636177737775), (7, 0.0), (8, 0.025354865319297613), (9, 0.05729387641615491), (10, 0.07458716769328631), (11, 0.05165385566117412), (12, 0.2149480185394144), (13, 0.015521746465928694), (14, 0.0712429048865423), (15, 0.04406568184699067), (16, 0.10931595900775373), (17, 0.01581536252386072), (18, 0.04984641976237937), (19, 0.061747462782828846), (20, 1.0), (21, 0.6531253542318555), (22, 0.7210443895836868), (23, 0.020060532139039033), (24, 0.14455622490427544), (25, 0.08065653406309997), (26, 0.026951083016358186), (27, 0.08758046670689543), (28, 0.06492992537362444), (29, 0.024283331623435912), (30, 0.0), (31, 0.024192336375652997), (32, 0.2070103195427696), (33, 0.1698040673829544), (34, 0.032948523293168025), (35, 0.0626967770630659), (36, 0.12291354313103418), (37, 0.06673713637953228), (38, 0.014796635658373379), (39, 0.068555905

In [30]:
len(similarity_score)

14966

In [31]:
# sorting the movies based on their similarity score

sorted_similar_anime = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_anime)

[(20, 1.0), (22, 0.7210443895836868), (21, 0.6531253542318555), (14077, 0.5294899128122678), (14023, 0.4629572364845472), (14024, 0.4629572364845472), (14025, 0.4629572364845472), (2752, 0.3762420699067332), (195, 0.3555588059000729), (779, 0.34511171597668683), (898, 0.3388866414913535), (3159, 0.32684855984750144), (1915, 0.3186597774563065), (1685, 0.2982633785012911), (422, 0.2936742775997124), (132, 0.2908388472991843), (1073, 0.2884880073953667), (194, 0.28086484084130997), (2022, 0.2741385270877717), (2999, 0.2707053032832521), (5, 0.26999368223531545), (1394, 0.2674925380988964), (669, 0.2634475082646235), (1554, 0.26321327551286594), (51, 0.2629760875921184), (1199, 0.2619486142330416), (4820, 0.2617011742557032), (11438, 0.2573636407575208), (6351, 0.25727147054083116), (857, 0.2559920088129637), (9982, 0.2531001868167441), (1296, 0.25172969230005415), (796, 0.24947757990668692), (143, 0.24687104324542533), (166, 0.24609912200519476), (6365, 0.24565564895891534), (11389, 0.24

In [33]:
# print the name of similar anime based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_anime:
  index = movie[0]
  name_from_index = anime_data[anime_data.index==index]['name'].values[0]
  if (i<30):
    print(i, '.',name_from_index)
    i+=1

Movies suggested for you : 

1 . Neon Genesis Evangelion
2 . Neon Genesis Evangelion: The End of Evangelion
3 . Neon Genesis Evangelion: Death & Rebirth
4 . Neon Genesis Evangelion Music DVD
5 . Neon Genesis Evangelion: Director's Cut
6 . Neon Genesis Evangelion: Director's Cut
7 . Neon Genesis Evangelion: Director's Cut
8 . Shin Hurricane Polymar
9 . Kidou Senkan Nadesico: The Prince of Darkness
10 . Blue Seed
11 . Medarot
12 . Medarot Damashii
13 . Akihabara Dennou-gumi
14 . Five Star Stories
15 . Keroro Gunsou
16 . Shaman King
17 . A.D. Police (TV)
18 . Kidou Senkan Nadesico
19 . Keroro Gunsou Movie 2: Shinkai no Princess de Arimasu!
20 . Evangelion: 3.0+1.0
21 . Eyeshield 21
22 . Rockman.EXE
23 . Dragon Half
24 . Masou Kishin Cybuster
25 . Full Metal Panic!
26 . Kodomo no Omocha (TV)
27 . Kotencotenco
28 . Yu☆Gi☆Oh! VRAINS
29 . Ranpou


Movie Recommendation Sytem

In [38]:
anime_name = input(' Enter your favourite movie name : ')

list_of_all_names = anime_data['name'].tolist()

find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_anime = anime_data[anime_data.name == close_match]['animeID'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_anime]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  name_from_index = anime_data[anime_data.index==index]['name'].values[0]
  if (i<10):
    print(i, '.',name_from_index)
    i+=1

 Enter your favourite movie name : naruto
Movies suggested for you : 

1 . Neon Genesis Evangelion
2 . Neon Genesis Evangelion: The End of Evangelion
3 . Neon Genesis Evangelion: Death & Rebirth
4 . Neon Genesis Evangelion Music DVD
5 . Neon Genesis Evangelion: Director's Cut
6 . Neon Genesis Evangelion: Director's Cut
7 . Neon Genesis Evangelion: Director's Cut
8 . Shin Hurricane Polymar
9 . Kidou Senkan Nadesico: The Prince of Darkness
