# Importing relevant libraries

In [2]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib

# Loading the data into a dataframe

In [3]:
raw_data=pd.read_csv('Anime.csv')

In [4]:
data=raw_data.copy()

In [5]:
data.head()

Unnamed: 0,ID,Title,Synonyms,Japanese,English,Synopsis,Type,Episodes,Status,Start_Aired,...,Themes,Demographics,Duration_Minutes,Rating,Score,Scored_Users,Ranked,Popularity,Members,Favorites
0,16498,Shingeki no Kyojin,"AoT, SnK",進撃の巨人,Attack on Titan,"Centuries ago, mankind was slaughtered to near...",TV,25.0,Finished Airing,"Apr 7, 2013",...,"Gore, Military, Survival",Shounen,24.0,R - 17+ (violence & profanity),8.531,519803.0,1002.0,1,3524109,155695
1,1535,Death Note,DN,デスノート,Death Note,"Brutal murders, petty thefts, and senseless vi...",TV,37.0,Finished Airing,"Oct 4, 2006",...,Psychological,Shounen,23.0,R - 17+ (violence & profanity),8.621,485487.0,732.0,2,3504535,159701
2,5114,Fullmetal Alchemist: Brotherhood,"Hagane no Renkinjutsushi Fullmetal Alchemist, ...",鋼の錬金術師 FULLMETAL ALCHEMIST,Fullmetal Alchemist Brotherhood,After a horrific alchemy experiment goes wrong...,TV,64.0,Finished Airing,"Apr 5, 2009",...,Military,Shounen,24.0,R - 17+ (violence & profanity),9.131,900398.0,12.0,3,2978455,207772
3,30276,One Punch Man,"One Punch-Man, One-Punch Man, OPM",ワンパンマン,One Punch Man,The seemingly unimpressive Saitama has a rathe...,TV,12.0,Finished Airing,"Oct 5, 2015",...,"Parody, Super Power",Seinen,24.0,R - 17+ (violence & profanity),8.511,19066.0,1112.0,4,2879907,59651
4,11757,Sword Art Online,"S.A.O, SAO",ソードアート・オンライン,Sword Art Online,Ever since the release of the innovative Nerve...,TV,25.0,Finished Airing,"Jul 8, 2012",...,"Love Polygon, Video Game",Unknown,23.0,PG-13 - Teens 13 or older,7.201,990254.0,29562.0,5,2813565,64997


# Dealing with missing values

In [6]:
data.isnull().sum()

ID                     0
Title                  0
Synonyms               0
Japanese               0
English                0
Synopsis               0
Type                   0
Episodes             547
Status                 0
Start_Aired            0
End_Aired              0
Premiered              0
Broadcast              0
Producers              0
Licensors              0
Studios                0
Source                 0
Genres                 0
Themes                 0
Demographics           0
Duration_Minutes     599
Rating                 0
Score               6898
Scored_Users        6898
Ranked              1924
Popularity             0
Members                0
Favorites              0
dtype: int64

In [7]:
data=data.fillna('')

In [8]:
data.isnull().sum()

ID                  0
Title               0
Synonyms            0
Japanese            0
English             0
Synopsis            0
Type                0
Episodes            0
Status              0
Start_Aired         0
End_Aired           0
Premiered           0
Broadcast           0
Producers           0
Licensors           0
Studios             0
Source              0
Genres              0
Themes              0
Demographics        0
Duration_Minutes    0
Rating              0
Score               0
Scored_Users        0
Ranked              0
Popularity          0
Members             0
Favorites           0
dtype: int64

# Feature selection

In [9]:
features=data['Producers']+' '+data['Genres']+' '+data['Studios']+' '+\
data['Themes']+' '+data['Synopsis']+' '+data['Rating']+' '+\
data['Demographics']

# Converting from text to feature vectors

In [10]:
vectorizer=TfidfVectorizer()
vectorized_features=vectorizer.fit_transform(features)
print(vectorized_features)

  (0, 39971)	0.03688175265943134
  (0, 34312)	0.042106356650370234
  (0, 47357)	0.04176952361542334
  (0, 147)	0.04150561514296596
  (0, 36575)	0.0326958143982989
  (0, 26235)	0.03197364015582867
  (0, 48546)	0.03239502287933135
  (0, 5626)	0.1146479973694208
  (0, 2921)	0.030903750696804514
  (0, 24789)	0.05383622770376747
  (0, 4499)	0.04508062279193405
  (0, 44175)	0.03736726185049059
  (0, 10069)	0.07516059354464982
  (0, 47897)	0.04352773517984471
  (0, 10906)	0.05757411711564978
  (0, 34997)	0.0582982782803653
  (0, 47739)	0.04874820852559029
  (0, 5819)	0.07671916447214915
  (0, 21678)	0.0565234027151994
  (0, 3003)	0.10239669959398316
  (0, 3023)	0.10239669959398316
  (0, 14957)	0.04633571150237689
  (0, 7190)	0.055591132775873436
  (0, 2276)	0.03519825659974376
  (0, 1054)	0.10442987882036164
  :	:
  (21455, 1272)	0.4018452809558012
  (21455, 1913)	0.2717425933578838
  (21456, 23061)	0.37062132376767065
  (21456, 1429)	0.30878929594238136
  (21456, 46421)	0.6243147278974741
  

# Getting Similarity

In [11]:
similar=cosine_similarity(vectorized_features)

In [12]:
print(similar)

[[1.         0.13705108 0.13457552 ... 0.         0.         0.        ]
 [0.13705108 1.         0.10089336 ... 0.00538849 0.00562633 0.0059111 ]
 [0.13457552 0.10089336 1.         ... 0.02013036 0.02101887 0.01174247]
 ...
 [0.         0.00538849 0.02013036 ... 1.         0.95772768 0.64738852]
 [0.         0.00562633 0.02101887 ... 0.95772768 1.         0.67596304]
 [0.         0.0059111  0.01174247 ... 0.64738852 0.67596304 1.        ]]


# Anime Title in a list form

In [70]:
anime_title=data['Title'].tolist()
print(anime_title)

['Shingeki no Kyojin', 'Death Note', 'Fullmetal Alchemist: Brotherhood', 'One Punch Man', 'Sword Art Online', 'Boku no Hero Academia', 'Naruto', 'Tokyo Ghoul', 'Kimetsu no Yaiba', 'Hunter x Hunter (2011)', 'Kimi no Na wa.', 'Shingeki no Kyojin Season 2', 'Steins;Gate', 'Boku no Hero Academia 2nd Season', 'No Game No Life', 'Naruto: Shippuuden', 'Code Geass: Hangyaku no Lelouch', 'Shingeki no Kyojin Season 3', 'Koe no Katachi', 'Toradora!', 'Shigatsu wa Kimi no Uso', 'Jujutsu Kaisen', 'Noragami', 'Re:Zero kara Hajimeru Isekai Seikatsu', 'One Piece', 'Boku no Hero Academia 3rd Season', 'Angel Beats!', 'Shingeki no Kyojin Season 3 Part 2', 'Akame ga Kill!', 'Boku dake ga Inai Machi', 'Nanatsu no Taizai', 'Mirai Nikki (TV)', 'Sword Art Online II', 'Mob Psycho 100', 'Ansatsu Kyoushitsu', 'Ao no Exorcist', 'Haikyuu!!', 'Kono Subarashii Sekai ni Shukufuku wo!', 'Yakusoku no Neverland', 'Bleach', 'Kiseijuu: Sei no Kakuritsu', 'Shingeki no Kyojin: The Final Season', 'Cowboy Bebop', 'Sen to Chih

# Get anime title from user

In [95]:
anime= input('Kindly enter an anime title: ')

# Find close match

In [96]:
close_match=difflib.get_close_matches(anime,anime_title)
print(close_match)

['Jujutsu Kaisen', 'Jujutsu Kaisen 0 Movie', 'Juuni Taisen']


In [97]:
match=close_match[0]

# Changing the ID column to sensible index

In [98]:
data['ID']=range(21460)

# Getting the ID of the anime

In [99]:
anime_ID=data[data['Title']==match]['ID'].values[0]
print(anime_ID)

21


# List of similar anime and their ID

In [100]:
score_of_similarity =list(enumerate(similar[anime_ID]))
print(score_of_similarity)

[(0, 0.10545372796735963), (1, 0.11671630167724298), (2, 0.08209725932587492), (3, 0.09036611430234426), (4, 0.10920338749453133), (5, 0.12418897696116421), (6, 0.08712980046372781), (7, 0.1313017176104515), (8, 0.12669306696844737), (9, 0.1036996293053156), (10, 0.09129252902392651), (11, 0.07983503857613775), (12, 0.1122337924169617), (13, 0.10299368606532146), (14, 0.08284558791172075), (15, 0.11604731425456952), (16, 0.11441801879171139), (17, 0.09851902854841191), (18, 0.10736666979661721), (19, 0.07923444280477127), (20, 0.11951758604473134), (21, 1.0000000000000002), (22, 0.09224959485752864), (23, 0.11006028588196608), (24, 0.11527442868966636), (25, 0.10634652156205625), (26, 0.1261403179158393), (27, 0.1131398338105368), (28, 0.09280745327769166), (29, 0.12928077625275497), (30, 0.09918318048407981), (31, 0.08864975536282604), (32, 0.09750770874241735), (33, 0.09001407100746625), (34, 0.11469415237919306), (35, 0.15092003100227966), (36, 0.14734071096443602), (37, 0.106004053

# Ranking the anime based on their similarity score

In [101]:
rank=sorted(score_of_similarity,key=lambda x:x[1],reverse=True)
print(rank)

[(21, 1.0000000000000002), (2827, 0.3910130873037641), (2441, 0.24254980130651163), (949, 0.242031185328844), (504, 0.23409517465914115), (329, 0.22720008512244935), (1534, 0.2253725060020067), (4039, 0.2037537672126455), (12147, 0.20320768850063597), (1955, 0.20094471613447493), (683, 0.19722970751065533), (360, 0.19301847994080484), (2806, 0.18350879984476165), (7656, 0.1773765713026118), (387, 0.16777277897760465), (403, 0.1669781273133434), (590, 0.1668440666263087), (1030, 0.16561571356268281), (269, 0.16450366349745915), (482, 0.16369690364274733), (3691, 0.1635909611133319), (183, 0.16332947340187973), (1905, 0.16166058602112854), (1141, 0.16131459726496972), (816, 0.16104287716677368), (339, 0.16093206121994202), (11986, 0.16069697543554612), (1406, 0.16062681108952195), (4978, 0.15815208304164144), (1670, 0.15712063925956732), (4871, 0.15664003718596647), (9673, 0.15537705609320346), (102, 0.15469116940644562), (296, 0.15417659316763052), (860, 0.15307598410395815), (12656, 0.

# Getting the recommendations

In [113]:

print('Anime Recommendations: ')
i=1
for anime in rank:
    index=anime[0]
    title=data[data.index==index]['Title'].values[0]
    if i<50:
        print(i,'.',title)
        i+=1
        

Anime Recommendations: 
1 . Jujutsu Kaisen
2 . Jujutsu Kaisen Official PV
3 . Shakugan no Shana Movie
4 . Grisaia no Meikyuu: Caprice no Mayu 0
5 . Grisaia no Rakuen
6 . Grisaia no Kajitsu
7 . Tensei Kenja no Isekai Life: Dai-2 no Shokugyou wo Ete, Sekai Saikyou ni Narimashita
8 . Master Piece The Animation
9 . Choujin Sentai Baratack
10 . Jujutsu Kaisen 2nd Season
11 . Shakugan no Shana II (Second)
12 . Shakugan no Shana
13 . Tonagura!
14 . Tori no Uta
15 . Ichiban Ushiro no Daimaou
16 . Jujutsu Kaisen 0 Movie
17 . Platinum End
18 . Rewrite
19 . Maou Gakuin no Futekigousha: Shijou Saikyou no Maou no Shiso, Tensei shite Shison-tachi no Gakkou e Kayou
20 . Kekkai Sensen & Beyond
21 . Ayakashi
22 . Kekkai Sensen
23 . Aura: Maryuuin Kouga Saigo no Tatakai
24 . Gyakkyou Burai Kaiji: Hakairoku-hen
25 . Persona 4 the Animation
26 . Ajin
27 . Play Ball
28 . Garo: Honoo no Kokuin
29 . Yao Shen Ji
30 . Makai Ouji: Devils and Realist
31 . W: Wish
32 . Helck
33 . Mononoke Hime
34 . Baka to Test t

# Summary

In [118]:
anime= input('Kindly enter an anime title: ')
anime_title=data['Title'].tolist()
close_match=difflib.get_close_matches(anime,anime_title)
match=close_match[0]
anime_ID=data[data['Title']==match]['ID'].values[0]
score_of_similarity =list(enumerate(similar[anime_ID]))
rank=sorted(score_of_similarity,key=lambda x:x[1],reverse=True)

print('Anime Recommendations: ')
i=1
for anime in rank:
    index=anime[0]
    title=data[data.index==index]['Title'].values[0]
    if i<50:
        print(i,'.',title)
        i+=1

Anime Recommendations: 
1 . Jujutsu Kaisen
2 . Jujutsu Kaisen Official PV
3 . Shakugan no Shana Movie
4 . Grisaia no Meikyuu: Caprice no Mayu 0
5 . Grisaia no Rakuen
6 . Grisaia no Kajitsu
7 . Tensei Kenja no Isekai Life: Dai-2 no Shokugyou wo Ete, Sekai Saikyou ni Narimashita
8 . Master Piece The Animation
9 . Choujin Sentai Baratack
10 . Jujutsu Kaisen 2nd Season
11 . Shakugan no Shana II (Second)
12 . Shakugan no Shana
13 . Tonagura!
14 . Tori no Uta
15 . Ichiban Ushiro no Daimaou
16 . Jujutsu Kaisen 0 Movie
17 . Platinum End
18 . Rewrite
19 . Maou Gakuin no Futekigousha: Shijou Saikyou no Maou no Shiso, Tensei shite Shison-tachi no Gakkou e Kayou
20 . Kekkai Sensen & Beyond
21 . Ayakashi
22 . Kekkai Sensen
23 . Aura: Maryuuin Kouga Saigo no Tatakai
24 . Gyakkyou Burai Kaiji: Hakairoku-hen
25 . Persona 4 the Animation
26 . Ajin
27 . Play Ball
28 . Garo: Honoo no Kokuin
29 . Yao Shen Ji
30 . Makai Ouji: Devils and Realist
31 . W: Wish
32 . Helck
33 . Mononoke Hime
34 . Baka to Test t