In [1]:
import pandas as pd # For data processing

In [2]:
animes = pd.read_csv("anime-dataset-2023.csv")

In [3]:
animes.columns

Index(['anime_id', 'Name', 'English name', 'Other name', 'Score', 'Genres',
       'Synopsis', 'Type', 'Episodes', 'Aired', 'Premiered', 'Status',
       'Producers', 'Licensors', 'Studios', 'Source', 'Duration', 'Rating',
       'Rank', 'Popularity', 'Favorites', 'Scored By', 'Members', 'Image URL'],
      dtype='object')

In [4]:
animes=animes[['anime_id', 'Name', 'English name', 'Score', 'Genres', 'Synopsis', 'Type', 'Studios', 'Source']]

In [5]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

cv = CountVectorizer(max_features=5000, stop_words='english') # Create a vector to be used for cosine similarity

In [6]:
new = animes.drop(columns=['Score'])
new['tags'] =  new['Name'] + ' ' + new['Genres']  + ' ' + new['Studios']
new['tags']

0        Cowboy Bebop Action, Award Winning, Sci-Fi Sun...
1        Cowboy Bebop: Tengoku no Tobira Action, Sci-Fi...
2                Trigun Action, Adventure, Sci-Fi Madhouse
3        Witch Hunter Robin Action, Drama, Mystery, Sup...
4        Bouken Ou Beet Adventure, Fantasy, Supernatura...
                               ...                        
24900    Wu Nao Monu Comedy, Fantasy, Slice of Life UNK...
24901    Bu Xing Si: Yuan Qi Action, Adventure, Fantasy...
24902    Di Yi Xulie Action, Adventure, Fantasy, Sci-Fi...
24903             Bokura no Saishuu Sensou UNKNOWN UNKNOWN
24904                       Shijuuku Nichi UNKNOWN UNKNOWN
Name: tags, Length: 24905, dtype: object

In [7]:
vector = cv.fit_transform(new['tags']).toarray() # Transform the tags to be used as a vector
similarity = cosine_similarity(vector)

In [8]:
def recommend(anime):
    index = new[new['Name'] == anime].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:10]:
        print(i)
        print(new.iloc[i[0]].Name)
    


In [9]:
recommend('Death Note') # (animes_id, similarity %), name
print('---')

(2722, 0.912870929175277)
Death Note: Rewrite
(7106, 0.7999999999999999)
Death Billiards
(9710, 0.7302967433402215)
Death Parade
(3858, 0.5477225575051662)
Mouryou no Hako
(2728, 0.5163977794943223)
Gyakkyou Burai Kaiji: Ultimate Survivor
(6124, 0.5163977794943223)
Gyakkyou Burai Kaiji: Hakairoku-hen
(21008, 0.5163977794943223)
Tesla Note
(299, 0.47434164902525683)
Mousou Dairinin
(5583, 0.47434164902525683)
Supernatural The Animation
---


In [10]:
recommend('Tokyo Ghoul') # (animes_id, similarity %), name
print('---')

(9632, 1.0000000000000002)
Tokyo Ghoul √A
(10849, 1.0000000000000002)
Tokyo Ghoul: "Pinto"
(13588, 1.0000000000000002)
Tokyo Ghoul:re
(10513, 0.9258200997725515)
Tokyo Ghoul: "Jack"
(14405, 0.8660254037844387)
Tokyo Ghoul:re 2nd Season
(6038, 0.6123724356957946)
Naruto x UT
(2850, 0.5773502691896258)
Ayakashi
(10, 0.5477225575051662)
Naruto
(131, 0.5477225575051662)
Juuni Kokuki
---


In [11]:
recommend('Shingeki no Kyojin') # (animes_id, similarity %), name
print('---')

(9352, 0.8249579113843054)
Shingeki no Kyojin Season 2
(7857, 0.7559289460184543)
Shingeki no Kyojin OVA
(13176, 0.7559289460184543)
Shingeki no Kyojin Season 3
(14865, 0.7559289460184543)
Shingeki no Kyojin Season 3 Part 2
(16827, 0.7559289460184543)
Shingeki no Kyojin: Chronicle
(8988, 0.7071067811865475)
Shingeki no Kyojin Movie 1: Guren no Yumiya
(8989, 0.7071067811865475)
Shingeki no Kyojin Movie 2: Jiyuu no Tsubasa
(13349, 0.7071067811865475)
Shingeki no Kyojin: Lost Girls
(9353, 0.6804138174397717)
Shingeki no Kyojin: Kuinaki Sentaku
---


In [12]:
recommend('Kimi no Na wa.') # Need to apply popularity factors to this model\
print('---')

(7500, 0.6708203932499369)
Kotonoha no Niwa
(15056, 0.6000000000000001)
Tenki no Ko
(2560, 0.5976143046671969)
Makasete Iruka!
(232, 0.5720775535473555)
Hoshi no Koe
(1535, 0.565685424949238)
Byousoku 5 Centimeter
(7007, 0.565685424949238)
Taisei Kensetsu CMs
(14143, 0.565685424949238)
Shikioriori
(406, 0.5262348115842176)
Kumo no Mukou, Yakusoku no Basho
(4908, 0.5163977794943223)
Ibarahime Mata wa Nemurihime
---


In [13]:
import pickle
pickle.dump(new, open('anime_list.pkl','wb'))
pickle.dump(new, open('matrix.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))