In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings

In [2]:
warnings.filterwarnings("ignore")

In [3]:
df=pd.read_csv('animes.csv').loc[:,['uid','title','members','synopsis']].rename(columns={'uid':'anime_id'})
df['title']=df['title'].str.lower()
df

Unnamed: 0,anime_id,title,members,synopsis
0,28891,haikyuu!! second season,489888,Following their participation at the Inter-Hig...
1,23273,shigatsu wa kimi no uso,995473,Music accompanies the path of the human metron...
2,34599,made in abyss,581663,The Abyss—a gaping chasm stretching down into ...
3,5114,fullmetal alchemist: brotherhood,1615084,"""In order for something to be obtained, someth..."
4,31758,kizumonogatari iii: reiketsu-hen,214621,After helping revive the legendary vampire Kis...
...,...,...,...,...
19306,32979,flip flappers,134252,Cocona is an average middle schooler living wi...
19307,123,fushigi yuugi,84407,"While visiting the National Library, junior-hi..."
19308,1281,gakkou no kaidan,83093,"Years ago, all of the ghosts in a haunted scho..."
19309,450,inuyasha movie 2: kagami no naka no mugenjo,71989,Inuyasha and company have finally destroyed Na...


In [4]:
df['synopsis']=df['synopsis'].str.replace('\r','')
df['synopsis']=df['synopsis'].str.replace('\n','')
df['synopsis']=df['synopsis'].fillna('')

for i in range(0,19311):
    df['synopsis'][i] = df['synopsis'][i].split("  [")[0]
    
df

Unnamed: 0,anime_id,title,members,synopsis
0,28891,haikyuu!! second season,489888,Following their participation at the Inter-Hig...
1,23273,shigatsu wa kimi no uso,995473,Music accompanies the path of the human metron...
2,34599,made in abyss,581663,The Abyss—a gaping chasm stretching down into ...
3,5114,fullmetal alchemist: brotherhood,1615084,"""In order for something to be obtained, someth..."
4,31758,kizumonogatari iii: reiketsu-hen,214621,After helping revive the legendary vampire Kis...
...,...,...,...,...
19306,32979,flip flappers,134252,Cocona is an average middle schooler living wi...
19307,123,fushigi yuugi,84407,"While visiting the National Library, junior-hi..."
19308,1281,gakkou no kaidan,83093,"Years ago, all of the ghosts in a haunted scho..."
19309,450,inuyasha movie 2: kagami no naka no mugenjo,71989,Inuyasha and company have finally destroyed Na...


In [5]:
df[df['title']=='dragon ball']

Unnamed: 0,anime_id,title,members,synopsis
377,223,dragon ball,565538,Gokuu Son is a young boy who lives in the wood...
6643,223,dragon ball,565538,Gokuu Son is a young boy who lives in the wood...


In [6]:
df.to_csv('anime_dataset.csv')

In [7]:
df = pd.read_csv('anime_dataset.csv').iloc[:5000, 1:]
df = df.sort_values(by="anime_id").drop_duplicates(subset=["title"])

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [9]:
tv = cv.fit_transform(df['synopsis'].astype('U')).toarray()

In [10]:
tv.shape

(4835, 5000)

In [11]:
cv.get_feature_names()

['00',
 '000',
 '01',
 '10',
 '100',
 '10th',
 '11',
 '12',
 '12th',
 '13',
 '13th',
 '14',
 '15',
 '15th',
 '16',
 '16th',
 '17',
 '18',
 '19',
 '1980s',
 '1998',
 '1999',
 '19th',
 '1st',
 '20',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2015',
 '2017',
 '2018',
 '2020',
 '20th',
 '21',
 '21st',
 '22',
 '22nd',
 '23',
 '24',
 '24th',
 '25',
 '26',
 '27',
 '28',
 '29',
 '2nd',
 '30',
 '300',
 '31',
 '3d',
 '3rd',
 '4th',
 '50',
 '500',
 '5th',
 '6th',
 '7th',
 '8th',
 '90',
 '9th',
 'abandoned',
 'abducted',
 'abh',
 'abilities',
 'ability',
 'able',
 'abnormal',
 'aboard',
 'abroad',
 'abruptly',
 'absence',
 'absent',
 'absolute',
 'absolutely',
 'abuse',
 'abusive',
 'abyss',
 'academic',
 'academy',
 'accept',
 'acceptance',
 'accepted',
 'accepting',
 'accepts',
 'access',
 'accident',
 'accidentally',
 'acclaimed',
 'accompanied',
 'accompanying',
 'accomplish',
 'according',
 'account',
 'accused',
 'accustomed',
 'ace',
 'achieve',
 'achieving',
 'acquain

In [12]:
from sklearn.metrics.pairwise import cosine_similarity
cos_sim = cosine_similarity(tv)
cos_sim

array([[1.        , 0.17770466, 0.04173112, ..., 0.        , 0.03800209,
        0.        ],
       [0.17770466, 1.        , 0.09393364, ..., 0.        , 0.0285133 ,
        0.        ],
       [0.04173112, 0.09393364, 1.        , ..., 0.        , 0.01339179,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.03800209, 0.0285133 , 0.01339179, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [13]:
def recommender(anime):
    index = df[df['title'] == anime].index[0]
    distances = sorted(list(enumerate(cos_sim[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:10]:
        print(df.iloc[i[0]].title)

In [14]:
recommender('made in abyss')

free!: eternal summer - kindan no all hard!
bang dream!: asonjatta!
shin strange+ special
steins;gate 0: kesshou takei no valentine - bittersweet intermedio
strange+ special
monhun nikki girigiri airou mura: airou kiki ippatsu specials
choboraunyopomi gekijou dai ni maku ai mai mii: mousou catastrophe special
angel beats! specials
gakuen handsome special


In [15]:
recommender('dragon ball')

suki da yo!
matou kitan zankan!
letchu, getchu, saru getchu
ryuu seiki
life no color
ishuzoku reviewers
sakura no mori
toufu kozou
maki-chan to now.


In [16]:
recommender('death note')

doubutsu no mori
koihime
mayoiga
mahoujin guruguru
maid ane
utawarerumono
robin-kun to 100 nin no otomodachi
kaitei daisensou: ai no 20,000 miles
karakara-sama no himitsu


In [17]:
recommender('one piece')

kisaku spirit
shusaku liberty
yaruki manman
tsuma to mama to boin
jii tousaku
hatsu inu 2 the animation: strange kind of woman - again
kyouiku shidou the animation
jinkou shoujo: henshin sex android
bikyaku seido kaichou ai
