# Anime Recommendation System Project

## Imports

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import sklearn as skl

## Datasets

#### Main Anime Dataset (anime.csv)

In [2]:
anime_main = pd.read_csv("data/anime.csv")

print(anime_main.shape)
anime_main.head()

(17562, 35)


Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


#### Anime Ratings Dataset (rating_complete.csv)

In [3]:
anime_ratings = pd.read_csv("data/rating_complete.csv")

print(anime_ratings.shape)
anime_ratings.head()

(57633278, 3)


Unnamed: 0,user_id,anime_id,rating
0,0,430,9
1,0,1004,5
2,0,3010,7
3,0,570,7
4,0,2762,9


## Dataset Cleaning and Merging

In [4]:
# Clean main anime dataset and keep necessary features
anime_main = anime_main[['MAL_ID', 'Name', 'Score', 'Genres', 'Type', 'Episodes']]
anime_main.dropna(inplace=True)

print(anime_main.shape)
anime_main.head()

(17562, 6)


Unnamed: 0,MAL_ID,Name,Score,Genres,Type,Episodes
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Movie,1
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",TV,26
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",TV,26
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",TV,52


In [5]:
# Reduce size of ratings dataset by sampling
anime_ratings = anime_ratings.sample(n=10000)
print(anime_ratings.shape)

(10000, 3)


In [6]:
# Rename columns and merge datasets
anime_main = anime_main.rename(columns={'MAL_ID': 'Anime ID'})
anime_ratings = anime_ratings.rename(columns={'user_id': 'User ID', 'anime_id': 'Anime ID', 'rating': 'Rating'})
anime_df = pd.merge(anime_main, anime_ratings, on='Anime ID')

print(anime_df.shape)
anime_df.head()

(10000, 8)


Unnamed: 0,Anime ID,Name,Score,Genres,Type,Episodes,User ID,Rating
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,36521,10
1,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,108528,8
2,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,236164,8
3,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,190057,10
4,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",TV,26,89530,8


# Old SVD Code

In [None]:
#Create Ratings Matrix
ratingsMat = np.ndarray(shape = (np.max(animeRatings.anime_id.values), np.max(animeRatings.user_id.values), dtype = np.uint8) 
                        ratingsMat[animeRatings.anime_id.values-1, animeRatings.user_id.values-1] = animeRatings.rating.values)

In [None]:
#Normalize animeRating matrix
normalized_Ratings = ratingsMat - np.asarray([(np.mean(ratingsMat, 1))]).T
#Compute the SVD
A = normalized_Ratings.T / np.sqrt(ratingsMat.shape[0] - 1)
U, S, V = np.linalg.svd(A)

#Define a function to calculate cosine similarity
def topSimiliar(data, anime_id, top_n=10):
    index = anime_id - 1 
    anime_row = data[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', data, data))
    similarity = np.dot(anime_row, data.T) / (magnitude[index] * magnitude)
    sort_Index = np.argsort(-similarity)
    return sort_Index[:top_n]

In [None]:
def printSimiliarAnime(animeList, animeID, numIndexes):
    print('Recommendatinos for {0}:\n'.format
        ( animeList[animeList.MAL_ID == animeID].Name.values[0] )
    )
    for iD in numIndexes + 1:
        print(animeList[animeList.MAL_ID == animeID].name.values[0])

In [None]:
#Test
k = 50
animeID = 10
numIndexes = 10
sliced = V.T[:,:k]
indexes = topSimiliar(sliced,animeID,numIndexes)

printSimiliarAnime(animeList, animeID, indexes)