In [71]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [72]:
#loading the data file
anime_data = pd.read_csv('/content/animes.csv')

In [73]:
anime_data.head()

Unnamed: 0,index,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link
0,0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...","Oct 4, 2015 to Mar 27, 2016",25.0,489888,141,25.0,8.82,https://cdn.myanimelist.net/images/anime/9/766...,https://myanimelist.net/anime/28891/Haikyuu_Se...
1,1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...","Oct 10, 2014 to Mar 20, 2015",22.0,995473,28,24.0,8.83,https://cdn.myanimelist.net/images/anime/3/671...,https://myanimelist.net/anime/23273/Shigatsu_w...
2,2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...","Jul 7, 2017 to Sep 29, 2017",13.0,581663,98,23.0,8.83,https://cdn.myanimelist.net/images/anime/6/867...,https://myanimelist.net/anime/34599/Made_in_Abyss
3,3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","['Action', 'Military', 'Adventure', 'Comedy', ...","Apr 5, 2009 to Jul 4, 2010",64.0,1615084,4,1.0,9.23,https://cdn.myanimelist.net/images/anime/1223/...,https://myanimelist.net/anime/5114/Fullmetal_A...
4,4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"['Action', 'Mystery', 'Supernatural', 'Vampire']","Jan 6, 2017",1.0,214621,502,22.0,8.83,https://cdn.myanimelist.net/images/anime/3/815...,https://myanimelist.net/anime/31758/Kizumonoga...


In [74]:
anime_data.tail()

Unnamed: 0,index,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link
19306,19306,32979,Flip Flappers,Cocona is an average middle schooler living wi...,"['Sci-Fi', 'Adventure', 'Comedy', 'Magic']","Oct 6, 2016 to Dec 29, 2016",13.0,134252,843,1070.0,7.73,https://cdn.myanimelist.net/images/anime/4/822...,https://myanimelist.net/anime/32979/Flip_Flappers
19307,19307,123,Fushigi Yuugi,"While visiting the National Library, junior-hi...","['Adventure', 'Fantasy', 'Magic', 'Martial Art...","Apr 6, 1995 to Mar 28, 1996",52.0,84407,1292,1071.0,7.73,https://cdn.myanimelist.net/images/anime/2/201...,https://myanimelist.net/anime/123/Fushigi_Yuugi
19308,19308,1281,Gakkou no Kaidan,"Years ago, all of the ghosts in a haunted scho...","['Mystery', 'Horror', 'Supernatural']","Oct 22, 2000 to Mar 25, 2001",19.0,83093,1314,1073.0,7.73,https://cdn.myanimelist.net/images/anime/9/183...,https://myanimelist.net/anime/1281/Gakkou_no_K...
19309,19309,450,InuYasha Movie 2: Kagami no Naka no Mugenjo,Inuyasha and company have finally destroyed Na...,"['Action', 'Adventure', 'Comedy', 'Historical'...","Dec 21, 2002",1.0,71989,1469,1077.0,7.73,https://cdn.myanimelist.net/images/anime/1162/...,https://myanimelist.net/anime/450/InuYasha_Mov...
19310,19310,87,Mobile Suit Gundam: Char's Counterattack,The year is Universal Century 0093. Char Aznab...,"['Military', 'Sci-Fi', 'Space', 'Drama', 'Mecha']","Mar 12, 1988",1.0,29248,2604,1081.0,7.73,https://cdn.myanimelist.net/images/anime/1523/...,https://myanimelist.net/anime/87/Mobile_Suit_G...


In [75]:
anime_data.shape

(19311, 13)

In [76]:
anime_data.isnull().sum()

index            0
uid              0
title            0
synopsis       975
genre            0
aired            0
episodes       706
members          0
popularity       0
ranked        3212
score          579
img_url        180
link             0
dtype: int64

In [77]:
# selecting features for recommendation

features = ['genre','synopsis']
print(features)

['genre', 'synopsis']


In [78]:
# replacing null values

for i in features:
  anime_data[i] = anime_data[i].fillna('')

In [79]:
# combining all selected features

com_features = anime_data['genre']+' '+anime_data['synopsis']
print(com_features)

0        ['Comedy', 'Sports', 'Drama', 'School', 'Shoun...
1        ['Drama', 'Music', 'Romance', 'School', 'Shoun...
2        ['Sci-Fi', 'Adventure', 'Mystery', 'Drama', 'F...
3        ['Action', 'Military', 'Adventure', 'Comedy', ...
4        ['Action', 'Mystery', 'Supernatural', 'Vampire...
                               ...                        
19306    ['Sci-Fi', 'Adventure', 'Comedy', 'Magic'] Coc...
19307    ['Adventure', 'Fantasy', 'Magic', 'Martial Art...
19308    ['Mystery', 'Horror', 'Supernatural'] Years ag...
19309    ['Action', 'Adventure', 'Comedy', 'Historical'...
19310    ['Military', 'Sci-Fi', 'Space', 'Drama', 'Mech...
Length: 19311, dtype: object


In [80]:
# converting text data to feature vectors

vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(com_features)
print(feature_vectors)

  (0, 32276)	0.039309439109386175
  (0, 23178)	0.03806908426558327
  (0, 42829)	0.038908076955171074
  (0, 27467)	0.1157874864384546
  (0, 39536)	0.09708211300091499
  (0, 34469)	0.12095981442801869
  (0, 6822)	0.06144046098581763
  (0, 42734)	0.03831161116044872
  (0, 18820)	0.049068064715380555
  (0, 19310)	0.11811327692559734
  (0, 2371)	0.0961367523990891
  (0, 2632)	0.12462963953918756
  (0, 17875)	0.0674661273175296
  (0, 27560)	0.04890973695066112
  (0, 27820)	0.08799697102141686
  (0, 13201)	0.09037099041451127
  (0, 28259)	0.07645351762767372
  (0, 17453)	0.05287134749725019
  (0, 11873)	0.05879357920903148
  (0, 38927)	0.0514371711369414
  (0, 15762)	0.09568910891011555
  (0, 39840)	0.06741808006513801
  (0, 9579)	0.08662889015006461
  (0, 34472)	0.09037099041451127
  (0, 22043)	0.061961208001096245
  :	:
  (19310, 40444)	0.039477836137844884
  (19310, 18462)	0.06898743539888609
  (19310, 16641)	0.052522628304079125
  (19310, 32276)	0.03633878214756342
  (19310, 23178)	0.0351

In [107]:
# getting similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.07954009 0.11449119 ... 0.14214565 0.0969189  0.08929167]
 [0.07954009 1.         0.1168764  ... 0.09578076 0.08570174 0.08200859]
 [0.11449119 0.1168764  1.         ... 0.14574187 0.12580741 0.14129884]
 ...
 [0.14214565 0.09578076 0.14574187 ... 1.         0.10749677 0.08685299]
 [0.0969189  0.08570174 0.12580741 ... 0.10749677 1.         0.09338607]
 [0.08929167 0.08200859 0.14129884 ... 0.08685299 0.09338607 1.        ]]


In [108]:
print(similarity.shape)

(19311, 19311)


In [109]:
# input anime title

anime_name = input(' Enter your favourite anime name : ')

 Enter your favourite anime name : Haikyuu


In [110]:
# creating a list of anime titles from data

list_of_all_titles = anime_data['title'].tolist()
print(list_of_all_titles)

['Haikyuu!! Second Season', 'Shigatsu wa Kimi no Uso', 'Made in Abyss', 'Fullmetal Alchemist: Brotherhood', 'Kizumonogatari III: Reiketsu-hen', 'Mob Psycho 100 II', 'Sen to Chihiro no Kamikakushi', 'Kimetsu no Yaiba', 'Owarimonogatari 2nd Season', 'Code Geass: Hangyaku no Lelouch R2', 'Haikyuu!!: Karasuno Koukou vs. Shiratorizawa Gakuen Koukou', 'Gintama.', 'Gintama Movie 2: Kanketsu-hen - Yorozuya yo Eien Nare', 'Gintama', 'Clannad: After Story', "Gintama': Enchousen", 'One Punch Man', 'Kaguya-hime no Monogatari', 'Koukaku Kidoutai 2.0', 'Nodame Cantabile: Finale - Mine to Kiyora no Saikai', 'Saraiya Goyou', 'Saint Seiya: Meiou Hades Meikai-hen', 'Noragami OVA', 'Lupin III: Part II', 'Kobayashi-san Chi no Maid Dragon: Valentine, Soshite Onsen! - Amari Kitai Shinaide Kudasai', 'Kuroko no Basket 2nd Season NG-shuu', 'K-On!: Live House!', 'K-On!', 'InuYasha Movie 3: Tenka Hadou no Ken', 'Hidamari Sketch x ☆☆☆ Specials', 'Haikyuu!! Movie 2: Shousha to Haisha', 'Gochuumon wa Usagi Desu ka?

In [111]:
# finding close match for input anime title

find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)
print(find_close_match)
close_match = find_close_match[0]
print(close_match)

['Haikyuu!!', 'Haikyuu!!', 'Kikyuu']
Haikyuu!!


In [112]:
# finding index of anime with title

anime_index = anime_data[anime_data.title == close_match]['index'].values[0]
print(anime_index)

735


In [114]:
print(anime_data[anime_data['index'] == 735].title)

735    Haikyuu!!
Name: title, dtype: object


In [115]:
# getting a list of similar anime

similarity_score = list(enumerate(similarity[anime_index]))
print(similarity_score)

[(0, 0.37658517678811027), (1, 0.1063698096118156), (2, 0.12477764439641896), (3, 0.11731986847531144), (4, 0.0998490338384418), (5, 0.1359958613363883), (6, 0.09532908539373566), (7, 0.11894106060540556), (8, 0.11134918904289269), (9, 0.12816216233021102), (10, 0.2519030223981891), (11, 0.08018361063607968), (12, 0.14133573956580925), (13, 0.08764228432617928), (14, 0.1371306590124196), (15, 0.0880892005715845), (16, 0.134380691433981), (17, 0.12241463337312428), (18, 0.06194921522505174), (19, 0.0997532170566039), (20, 0.0721108838257407), (21, 0.07781866235581639), (22, 0.05794509633293485), (23, 0.055941307150484995), (24, 0.02684745048708392), (25, 0.06110474767770234), (26, 0.08755802490880829), (27, 0.12917737401699347), (28, 0.058276156363241405), (29, 0.08196111652633384), (30, 0.14478273500907177), (31, 0.028545427864244535), (32, 0.09881727297256827), (33, 0.057739187128298994), (34, 0.10102286681235455), (35, 0.08847938666010854), (36, 0.1082329825614388), (37, 0.1049660108

In [116]:
len(similarity_score)

19311

In [117]:
# sorting anime based on their similarity score

similar_anime = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(similar_anime)
sorted_anime = similar_anime[1:]
print(sorted_anime)

[(735, 0.9999999999999996), (17756, 0.9999999999999996), (0, 0.37658517678811027), (3077, 0.37658517678811027), (11381, 0.3312405112967269), (16527, 0.29479011810637035), (3744, 0.2899411406209833), (2654, 0.2868031319301265), (16802, 0.2868031319301265), (10150, 0.2819746352629005), (7571, 0.27270431126793093), (3355, 0.2590783506595884), (14867, 0.2570806678025163), (10, 0.2519030223981891), (3087, 0.2519030223981891), (5831, 0.23386829997044514), (5522, 0.23104915380521923), (185, 0.22765029413126958), (5450, 0.22765029413126958), (447, 0.2252930759821376), (7816, 0.2252930759821376), (2714, 0.2240533357662911), (3070, 0.2240533357662911), (1765, 0.22158604749401276), (14464, 0.22158604749401276), (16179, 0.2215333121420426), (7472, 0.21718740507179982), (6959, 0.21252878021277913), (18634, 0.20918441124621615), (11794, 0.20839032452170744), (16804, 0.20754974427513254), (16496, 0.20718661597499863), (6578, 0.2056960551673792), (11326, 0.203084043493679), (5553, 0.20193600963906386)

In [118]:
# recommending similar anime based on index

print('Anime suggested for you : \n')

i = 1

for anime in sorted_anime:
  index = anime[0]
  title_from_index = anime_data[anime_data.index==index]['title'].values[0]
  if (i<16):
    print(i, '.',title_from_index)
    i+=1

Anime suggested for you : 

1 . Haikyuu!!
2 . Haikyuu!! Second Season
3 . Haikyuu!! Second Season
4 . Ashita e Attack!
5 . Attack No.1 (1970)
6 . Haikyuu!!: vs. "Akaten"
7 . Court no Naka no Tenshi-tachi
8 . Court no Naka no Tenshi-tachi
9 . Attacker You!
10 . Attack No.1
11 . Shoujo Fight: Norainu-tachi no Odekake
12 . 2.43: Seiin Koukou Danshi Volley-bu
13 . Haikyuu!!: Karasuno Koukou vs. Shiratorizawa Gakuen Koukou
14 . Haikyuu!!: Karasuno Koukou vs. Shiratorizawa Gakuen Koukou
15 . Moshidora
