### Simple Recommender System for new anime file

In [1]:
#importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
#loading the dataset
anime_data = pd.read_csv(r'C:\Users\JADESOLA\Documents\AI\MadeWithML\Recommendation system\Anime-Recommender-System\DATA\genre_data.csv')

In [3]:
#viewing the data
anime_data.head(3)

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"['Mystery', 'Police', 'Psychological', 'Supern...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"['Action', 'Military', 'Adventure', 'Comedy', ...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"['Action', 'Military', 'Mystery', 'SuperPower'...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452


## The features of the dataset are as follows: 
#### -Anime_id : anime Id (as per myanimelist.net)
#### -Title : name of anime
#### -Genre : Main genre
#### -Synopsis : Brief Discription
#### -Rating : Rating of anime as per myanimelist.net/
#### -ScoredBy : Total number of users who scored the given anime
#### -Popularity : Rank of anime based on popularity
#### -Members : Number of members added given anime on their list
#### -Episodes : Number of episodes

In [4]:
anime_data.shape

(6368, 9)

### Removing the camel-casing of the column names

In [5]:
anime_data.columns = map(str.lower, anime_data.columns)
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"['Mystery', 'Police', 'Psychological', 'Supern...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"['Action', 'Military', 'Adventure', 'Comedy', ...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"['Action', 'Military', 'Mystery', 'SuperPower'...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"['Thriller', 'SciFi']",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"['Action', 'SciFi', 'Comedy', 'Parody', 'Super...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [6]:
#checking the datatype of each column
anime_data.dtypes

title                 object
genres                object
rating               float64
scored_by              int64
members                int64
weighted_avg         float64
norm_weighted_avg    float64
norm_members         float64
score                float64
dtype: object

## Some columns like rating, scoredby, popularity, members, episodes need to be typecasted as integers. We'll convert them once we clean all the data.

In [7]:
#creating the function to remove the characters
import re
def clean_up(s):
    return re.sub(r"[^\sa-zA-Z0-9\.\,\;\!\-]","",str(s))

In [8]:
anime_data['title'] = anime_data['title'].apply(clean_up)
anime_data['genres'] = anime_data['genres'].apply(clean_up)

In [9]:
#viewing the cleaned data
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"Mystery, Police, Psychological, Supernatural, ...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"Action, Military, Adventure, Comedy, Drama, Ma...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"Action, Military, Mystery, SuperPower, Drama, ...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"Thriller, SciFi",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"Action, SciFi, Comedy, Parody, SuperPower, Sup...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [10]:
#checking for null values
anime_data.isnull().sum()

title                0
genres               0
rating               0
scored_by            0
members              0
weighted_avg         0
norm_weighted_avg    0
norm_members         0
score                0
dtype: int64

In [11]:
anime_data.shape

(6368, 9)

In [12]:
#checking out the number of unique animes
anime_data['title'].nunique()

6346

## Geting the best anime based on genres

In [13]:
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"Mystery, Police, Psychological, Supernatural, ...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"Action, Military, Adventure, Comedy, Drama, Ma...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"Action, Military, Mystery, SuperPower, Drama, ...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"Thriller, SciFi",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"Action, SciFi, Comedy, Parody, SuperPower, Sup...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [14]:
#to remove the space within elements in the list
def remove_space(x):
    return re.sub(" ","",str(x))

In [15]:
anime_data['genres'] = anime_data['genres'].apply(remove_space)

In [16]:
#splitting the genre column into lists
def split_columns(x):
      return x.split(',')

In [17]:
anime_data['genres'] = anime_data['genres'].apply(split_columns)

In [18]:
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"[Mystery, Police, Psychological, Supernatural,...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"[Action, Military, Adventure, Comedy, Drama, M...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"[Action, Military, Mystery, SuperPower, Drama,...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"[Thriller, SciFi]",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"[Action, SciFi, Comedy, Parody, SuperPower, Su...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [19]:
anime_data.dtypes

title                 object
genres                object
rating               float64
scored_by              int64
members                int64
weighted_avg         float64
norm_weighted_avg    float64
norm_members         float64
score                float64
dtype: object

In [37]:
#selecting the first two genre for every anime
def first_two(x):
    return x[:2]

In [38]:
anime_data["genres"] = anime_data["genres"].apply(first_two)

In [39]:
#mapping each title to its genre
anime_map = dict(zip(range(anime_data.shape[0]),anime_data['genres']))

In [40]:
anime_map

{0: ['Mystery', 'Police'],
 1: ['Action', 'Military'],
 2: ['Action', 'Military'],
 3: ['Thriller', 'SciFi'],
 4: ['Action', 'SciFi'],
 5: ['Action', 'Military'],
 6: ['Action', 'Adventure'],
 7: ['Action', 'Comedy'],
 8: ['Game', 'Adventure'],
 9: ['Supernatural', 'Drama'],
 10: ['Action', 'Military'],
 11: ['Action', 'Adventure'],
 12: ['SliceofLife', 'Comedy'],
 13: ['Action', 'Mystery'],
 14: ['Action', 'Adventure'],
 15: ['Adventure', 'Supernatural'],
 16: ['Drama', 'Music'],
 17: ['Action', 'Adventure'],
 18: ['Action', 'Adventure'],
 19: ['Action', 'Psychological'],
 20: ['SliceofLife', 'Comedy'],
 21: ['Action', 'Comedy'],
 22: ['Action', 'Adventure'],
 23: ['Action', 'Adventure'],
 24: ['Action', 'Adventure'],
 25: ['Mystery', 'Psychological'],
 26: ['Psychological', 'Drama'],
 27: ['Action', 'Adventure'],
 28: ['Action', 'Adventure'],
 29: ['Action', 'SciFi'],
 30: ['Action', 'Mystery'],
 31: ['SliceofLife', 'Supernatural'],
 32: ['Action', 'Comedy'],
 33: ['Action', 'Demons'

In [41]:
#function to get recommendation based on genre
def get_recommendations(genre):
    
    #checking if the genre is in the map inorder to pick the top ones
    index = []
    for i in range(anime_data.shape[0]):
        if genre in anime_map[i]:
            index.append(i)
        
    # index of the top animes in the chosen genre
    anime_index = index[0:10]
    
    # returning the top anime in the chose genre
    return anime_data['title'].iloc[anime_index]

In [51]:
get_recommendations('Mystery')

0                        Death Note
13                      Tokyo Ghoul
25          Boku dake ga Inai Machi
30                         Durarara
47                     Death Parade
62                          Baccano
68                Zankyou no Terror
79                   Shinsekai yori
80                    Tokyo Ghoul A
84    Suzumiya Haruhi no Shoushitsu
Name: title, dtype: object