In [1]:
#import libs
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
#import dataset
anime_df = pd.read_csv("anime.csv")

In [3]:
# inspect datset top 5 rows
anime_df.head(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [4]:
# inspect dataset bottom 5 rows
anime_df.tail(5)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
17557,48481,Daomu Biji Zhi Qinling Shen Shu,Unknown,"Adventure, Mystery, Supernatural",Unknown,盗墓笔记之秦岭神树,ONA,Unknown,"Apr 4, 2021 to ?",Unknown,...,Unknown,Unknown,Unknown,1.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
17558,48483,Mieruko-chan,Unknown,"Comedy, Horror, Supernatural",Unknown,見える子ちゃん,TV,Unknown,2021 to ?,Unknown,...,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
17559,48488,Higurashi no Naku Koro ni Sotsu,Unknown,"Mystery, Dementia, Horror, Psychological, Supe...",Higurashi:When They Cry – SOTSU,ひぐらしのなく頃に卒,TV,Unknown,"Jul, 2021 to ?",Summer 2021,...,1.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
17560,48491,Yama no Susume: Next Summit,Unknown,"Adventure, Slice of Life, Comedy",Unknown,ヤマノススメ Next Summit,TV,Unknown,Unknown,Unknown,...,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown
17561,48492,Scarlet Nexus,Unknown,"Action, Fantasy",Unknown,SCARLET NEXUS,TV,Unknown,"Jul, 2021 to ?",Summer 2021,...,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown


In [5]:
# fill all the 'Unknown' values with 0.0 ratings 
score_column = ['Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6', 'Score-5','Score-4', 'Score-3', 'Score-2', 'Score-1']

anime_df[score_column] = anime_df[score_column].replace('Unknown', '0.0')

anime_df.tail(3)

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
17559,48488,Higurashi no Naku Koro ni Sotsu,Unknown,"Mystery, Dementia, Horror, Psychological, Supe...",Higurashi:When They Cry – SOTSU,ひぐらしのなく頃に卒,TV,Unknown,"Jul, 2021 to ?",Summer 2021,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17560,48491,Yama no Susume: Next Summit,Unknown,"Adventure, Slice of Life, Comedy",Unknown,ヤマノススメ Next Summit,TV,Unknown,Unknown,Unknown,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17561,48492,Scarlet Nexus,Unknown,"Action, Fantasy",Unknown,SCARLET NEXUS,TV,Unknown,"Jul, 2021 to ?",Summer 2021,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Standardize the rating from all 
new_df = anime_df[['Name', 'Score-10', 'Score-9', 'Score-8', 'Score-7', 'Score-6', 'Score-5','Score-4', 'Score-3', 'Score-2', 'Score-1']]
new_df.head()

Unnamed: 0,Name,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,Cowboy Bebop,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,Cowboy Bebop: Tengoku no Tobira,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,Trigun,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,Witch Hunter Robin,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,Bouken Ou Beet,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [7]:
#transform the above 'Name' column into index nums
score_matrix = new_df.set_index('Name')

In [8]:
# convert the new_df into a cosimilarity matrix
cos_matrix = cosine_similarity(score_matrix, score_matrix)

In [9]:
#indicate top 5 most similar items based on index 0 values
sorted(list(enumerate(cos_matrix[0])), reverse= True, key=lambda x:x[1])[1:6]

[(8883, 0.9998863368733955),
 (9, 0.999606792838517),
 (176, 0.9994787932163087),
 (25, 0.998991997157842),
 (16167, 0.9987249937193928)]

In [10]:
#create the recomemndation sys function + reverse previous matrix 
def recommend(anime_rat):
    anime_index = new_df[new_df['Name']==anime_rat].index[0]
    distances = cos_matrix[anime_index]
    anime_list = sorted(list(enumerate(distances)), reverse= True, key = lambda x:x[1])[1:6]

    for i in anime_list:
        print(new_df.iloc[i[0]]['Name'])

In [11]:
#Test 1
recommend('Naruto')

Hokuto no Ken
Dragon Ball
Detective Conan OVA 09: The Stranger in 10 Years...
Code Geass: Hangyaku no Lelouch R2 Special Edition - Zero Requiem
Saiyuuki Gaiden


In [12]:
#Test 2
recommend('Blood+')

Chrno Crusade
Kumo no Mukou, Yakusoku no Basho
Furiko
Shoujo☆Kageki Revue Starlight
Jigoku Shoujo


In [13]:
#Test 3
recommend('Cowboy Bebop')

Shigatsu wa Kimi no Uso
Monster
Sen to Chihiro no Kamikakushi
Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen
Kimetsu no Yaiba Movie: Mugen Ressha-hen


In [14]:
# Panda test
recommend('Fullmetal Alchemist: Brotherhood')

Steins;Gate
Gintama'
Gintama': Enchousen
Clannad: After Story
Gintama°
