In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [2]:
# loading the data from the csv file to apandas dataframe
anime_data= pd.read_csv('anime.csv')

In [3]:
anime_data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
anime_data['index'] = anime_data.index

In [5]:
anime_data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,index
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630,0
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,1
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,2
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,3
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,4


In [6]:
# number of rows and columns in the data frame

anime_data.shape

(12294, 8)

In [7]:
# selecting the relevant features for recommendation

selected_features = ['genre','name','rating','episodes']
print(selected_features)

['genre', 'name', 'rating', 'episodes']


In [8]:
# replacing the null valuess with null string

for feature in selected_features:
  anime_data[feature] = anime_data[feature].fillna('')

In [9]:
# combining all the 5 selected features

combined_features = anime_data['genre']

In [10]:
print(combined_features)

0                     Drama, Romance, School, Supernatural
1        Action, Adventure, Drama, Fantasy, Magic, Mili...
2        Action, Comedy, Historical, Parody, Samurai, S...
3                                         Sci-Fi, Thriller
4        Action, Comedy, Historical, Parody, Samurai, S...
                               ...                        
12289                                               Hentai
12290                                               Hentai
12291                                               Hentai
12292                                               Hentai
12293                                               Hentai
Name: genre, Length: 12294, dtype: object


In [11]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

feature_vectors = vectorizer.fit_transform(combined_features)

In [12]:
print(feature_vectors)

  (0, 42)	0.544416168413049
  (0, 33)	0.5189547975465538
  (0, 31)	0.49038698907432837
  (0, 8)	0.4402471517460214
  (1, 37)	0.35098726333369934
  (1, 23)	0.5215484702178532
  (1, 20)	0.44963166999178716
  (1, 10)	0.3196092910272099
  (1, 1)	0.3176066463834324
  (1, 0)	0.29464923376142327
  (1, 8)	0.33583365500987794
  (2, 11)	0.2829751128348767
  (2, 34)	0.2829751128348767
  (2, 32)	0.5507572919931327
  (2, 27)	0.4480162264049302
  (2, 15)	0.3788680221180788
  (2, 5)	0.20076598368966772
  (2, 37)	0.2985531079974074
  (2, 0)	0.25063144363985657
  (3, 43)	0.8337686771680168
  (3, 11)	0.3904035047060254
  (3, 34)	0.3904035047060254
  (4, 11)	0.2829751128348767
  (4, 34)	0.2829751128348767
  (4, 32)	0.5507572919931327
  :	:
  (12275, 14)	1.0
  (12276, 14)	1.0
  (12277, 14)	0.8633765665410634
  (12277, 5)	0.5045601097468614
  (12278, 14)	0.5075218179063449
  (12278, 19)	0.4974674543287694
  (12278, 26)	0.4974674543287694
  (12278, 38)	0.4974674543287694
  (12279, 14)	1.0
  (12280, 14)	1.0


In [13]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [14]:
print(similarity)

[[1.         0.14784981 0.         ... 0.         0.         0.        ]
 [0.14784981 1.         0.1786367  ... 0.         0.         0.        ]
 [0.         0.1786367  1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         1.         1.        ]
 [0.         0.         0.         ... 1.         1.         1.        ]
 [0.         0.         0.         ... 1.         1.         1.        ]]


In [15]:
print(similarity.shape)

(12294, 12294)


In [16]:
# getting the anime name from the user

anime_name = input(' Enter your favourite anime name : ')

 Enter your favourite anime name : naruto


In [17]:
#creating a list with all the anime names given in the dataset

list_of_all_titles = anime_data['name'].tolist()
print(list_of_all_titles)

['Kimi no Na wa.', 'Fullmetal Alchemist: Brotherhood', 'Gintama°', 'Steins;Gate', 'Gintama&#039;', 'Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou', 'Hunter x Hunter (2011)', 'Ginga Eiyuu Densetsu', 'Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare', 'Gintama&#039;: Enchousen', 'Clannad: After Story', 'Koe no Katachi', 'Gintama', 'Code Geass: Hangyaku no Lelouch R2', 'Haikyuu!! Second Season', 'Sen to Chihiro no Kamikakushi', 'Shigatsu wa Kimi no Uso', 'Mushishi Zoku Shou 2nd Season', 'Ookami Kodomo no Ame to Yuki', 'Code Geass: Hangyaku no Lelouch', 'Hajime no Ippo', 'Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen', 'Cowboy Bebop', 'One Punch Man', 'Mononoke Hime', 'Suzumiya Haruhi no Shoushitsu', 'Monogatari Series: Second Season', 'Mushishi Zoku Shou', 'Mushishi', 'Tengen Toppa Gurren Lagann', 'Great Teacher Onizuka', 'Natsume Yuujinchou Go', 'Hajime no Ippo: New Challenger', 'Mushishi Zoku Shou: Suzu no Shizuku', 'Natsume Yuujinchou Shi', 'Howl no Ugoku Shiro',

In [18]:
# finding the close match for the anime name given by the user

find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)
print(find_close_match)

['Naruto', 'Haruwo']


In [19]:
close_match = find_close_match[0]
print(close_match)


Naruto


In [20]:
# finding the index of the anime with title

index_of_the_anime = anime_data[anime_data.name == close_match]['index'].values[0]
print(index_of_the_anime)

841


In [21]:
# getting a list of similar animes

similarity_score = list(enumerate(similarity[index_of_the_anime]))
print(similarity_score)

[(0, 0.0), (1, 0.1740028278667679), (2, 0.18726987082393406), (3, 0.0), (4, 0.18726987082393406), (5, 0.1783102193684844), (6, 0.6647906154780532), (7, 0.0), (8, 0.18726987082393406), (9, 0.18726987082393406), (10, 0.0), (11, 0.16288165249134992), (12, 0.18726987082393406), (13, 0.4270369857273739), (14, 0.1783102193684844), (15, 0.0), (16, 0.11994342697240229), (17, 0.0), (18, 0.0), (19, 0.4204781429152573), (20, 0.20278035280372714), (21, 0.4908498209810448), (22, 0.12707254853667996), (23, 0.43859454703471235), (24, 0.13361205404018112), (25, 0.04616093709501418), (26, 0.0440059608290384), (27, 0.0), (28, 0.0), (29, 0.14684900039087612), (30, 0.15465914535893782), (31, 0.0), (32, 0.20278035280372714), (33, 0.0), (34, 0.0), (35, 0.0), (36, 0.07728819061499642), (37, 0.07224719450773405), (38, 0.0), (39, 0.22561883229868904), (40, 0.0), (41, 0.07997133845762329), (42, 0.33873321184526656), (43, 0.1783102193684844), (44, 0.20278035280372714), (45, 0.06381910536092725), (46, 0.0), (47, 

In [22]:
len(similarity_score)

12294

In [25]:
# sorting the animes based on their similarity score

sorted_similar_anime = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_anime)

[(486, 1.0), (615, 1.0), (841, 1.0), (1103, 1.0), (1343, 1.0), (1472, 1.0), (1573, 1.0), (2458, 1.0), (2997, 1.0), (7628, 0.9806921231462878), (784, 0.9471851000728742), (1796, 0.9471851000728742), (2416, 0.9471851000728742), (4576, 0.9434515402047704), (206, 0.9368433976678295), (515, 0.9368433976678295), (588, 0.9368433976678295), (1409, 0.9368433976678295), (1930, 0.9368433976678295), (3203, 0.9368433976678295), (4275, 0.9368433976678295), (4314, 0.9368433976678295), (5999, 0.9368433976678295), (2342, 0.9365828373978186), (2852, 0.9365828373978186), (3443, 0.9365828373978186), (4498, 0.9365828373978186), (6163, 0.9365828373978186), (6763, 0.9365828373978186), (719, 0.935102204561982), (7867, 0.9221145750823206), (1227, 0.9165286172091266), (6970, 0.9105330847700722), (346, 0.904995644300767), (7080, 0.9042057004311208), (1184, 0.897798541502637), (182, 0.897075202412509), (271, 0.897075202412509), (1209, 0.8903390095385411), (2615, 0.8903390095385411), (3038, 0.8903390095385411), (4

In [26]:
# print the name of similar anime based on the index

print('anime suggested for you : \n')

i = 1

for anime in sorted_similar_anime:
  index = anime[0]
  title_from_index = anime_data[anime_data.index==index]['name'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

anime suggested for you : 

1 . Boruto: Naruto the Movie
2 . Naruto: Shippuuden
3 . Naruto
4 . Boruto: Naruto the Movie - Naruto ga Hokage ni Natta Hi
5 . Naruto x UT
6 . Naruto: Shippuuden Movie 4 - The Lost Tower
7 . Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono
8 . Naruto Shippuuden: Sunny Side Battle
9 . Naruto Soyokazeden Movie: Naruto to Mashin to Mitsu no Onegai Dattebayo!!
10 . Kyutai Panic Adventure!
11 . Naruto: Shippuuden Movie 6 - Road to Ninja
12 . Rekka no Honoo
13 . Naruto: Honoo no Chuunin Shiken! Naruto vs. Konohamaru!!
14 . Street Fighter Zero The Animation
15 . Dragon Ball Z
16 . Dragon Ball Kai (2014)
17 . Dragon Ball Kai
18 . Dragon Ball Z Movie 15: Fukkatsu no F
19 . Dragon Ball Super
20 . Dragon Ball Z: Summer Vacation Special
21 . Dragon Ball Z: Atsumare! Gokuu World
22 . Dragon Ball GT: Goku Gaiden! Yuuki no Akashi wa Suushinchuu
23 . Dragon Ball Z Movie 11: Super Senshi Gekiha!! Katsu no wa Ore da
24 . Kurokami The Animation
25 . Project ARMS
26 . Kuro

In [None]:
anime_name = input(' Enter your favourite anime name : ')

list_of_all_titles = anime_data['name'].tolist()

find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_anime = anime_data[anime_data.name == close_match]['index'].values[0]


similarity_score = list(enumerate(similarity[index_of_the_anime]))

sorted_similar_animes = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('animes suggested for you : \n')

i = 1

for anime in sorted_similar_animes:
  index = anime[0]
  title_from_index = anime_data[anime_data.index==index]['name'].values[0]
  if (i<50):
    print(i, '.',title_from_index)
    i+=1