In [1]:
#importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
#loading the dataset
anime_data = pd.read_csv('../DATA/Anime_new_cleaned.csv')

In [3]:
anime_data.columns = map(str.lower, anime_data.columns)
anime_data.head(3)

Unnamed: 0,anime_id,title,genres,synopsis,anime_type,producer,studio,rating,scored_by,popularity,members,num_of_episodes,anime_source,aired,link
0,1,Cowboy Bebop,"['Action','Adventure','Comedy','Drama','SciFi'...","In the year 2071, humanity has colonized sever...",TV,['BandaiVisual'],Sunrise,8.81,363889.0,39.0,704490.0,26.0,Original,"Apr 3, 1998 to Apr 24, 1999",https://myanimelist.net/anime/1/Cowboy_Bebop
1,5,Cowboy Bebop Tengoku no Tobira,"['Action','Space','Drama','Mystery','SciFi']","Another day, another bountysuch is the life of...",Movie,"['Sunrise','BandaiVisual']",Bones,8.41,111187.0,475.0,179899.0,1.0,Original,"Sep 1, 2001",https://myanimelist.net/anime/5/Cowboy_Bebop__...
2,6,Trigun,"['Action','SciFi','Adventure','Comedy','Drama'...","Vash the Stampede is the man with a 60,000,000...",TV,['VictorEntertainment'],Madhouse,8.31,197451.0,158.0,372709.0,26.0,Manga,"Apr 1, 1998 to Sep 30, 1998",https://myanimelist.net/anime/6/Trigun


In [4]:
#creating the function to remove the characters
import re
def clean_up(s):
    return re.sub(r"[^\sa-zA-Z0-9\.\,\;\!\-]","",str(s))

In [5]:
anime_data['title'] = anime_data['title'].apply(clean_up)
anime_data['genres'] = anime_data['genres'].apply(clean_up)
anime_data['producer'] = anime_data['producer'].apply(clean_up)
anime_data['studio'] = anime_data['studio'].apply(clean_up)

In [6]:
#accessing a limited number of columns required for weighted score recommendation
anime_data = anime_data[['title','genres','rating','scored_by','members']]
anime_data.head(2)

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0


In [7]:
anime_data =  anime_data[anime_data['members']>=1000]
anime_data

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451.0,372709.0
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875.0,74889.0
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757.0,11247.0
...,...,...,...,...,...
15811,Mega Man,,6.52,,3263.0
15812,AKo The Versus,,6.49,,2457.0
15813,Magic Knight Rayearth Omake,,6.45,,1730.0
15814,Rilu Rilu Fairilu Yousei no Door,,6.36,,1293.0


In [8]:
anime_data = anime_data.dropna(axis=0)
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451.0,372709.0
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875.0,74889.0
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757.0,11247.0


In [9]:
anime_data = anime_data.astype({'scored_by':'int64','members':'int64'})
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889,704490
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187,179899
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451,372709
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875,74889
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757,11247


In [10]:
anime_data = anime_data.drop_duplicates()
anime_data= anime_data.reset_index()
anime_data = anime_data.drop(columns='index')
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889,704490
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187,179899
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451,372709
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875,74889
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757,11247


In [11]:
#finding the weighted average for each anime
V = anime_data['scored_by']                       #number of votes for each anime
R = anime_data['rating']                     #average voting rating for each anime
C = anime_data['rating'].mean()              #mean vote across the whole report
m = anime_data['scored_by'].quantile(0.80)        #minimum votes requires to be among the top 

anime_data['weighted_avg'] = ((R*V)+(C*m))/(V+m)     #used by imdb to calculate weighted average of movies

In [12]:
#recommending the top 10 anime with the highest weighted average
wavg = anime_data.sort_values('weighted_avg', ascending = False)
wavg.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg
2688,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.174036
5396,Kimi no Na wa.,"Supernatural,Drama,Romance,School",9.19,467437,724490,9.076429
3313,Steins;Gate,"Thriller,SciFi",9.14,561405,1005781,9.046752
3658,Hunter x Hunter 2011,"Action,Adventure,SuperPower,Shounen",9.11,400879,716409,8.983343
2506,Clannad After Story,"SliceofLife,Comedy,Supernatural,Drama,Romance",9.01,362434,605864,8.877188


In [13]:
#sorting data according to their popularity
anime_members = anime_data.sort_values('members',ascending = False)
anime_members.head(5)

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg
1177,Death Note,"Mystery,Police,Psychological,Supernatural,Thri...",8.67,1006242,1451708,8.62831
4043,Shingeki no Kyojin,"Action,Military,Mystery,SuperPower,Drama,Fanta...",8.49,936784,1340641,8.44995
3710,Sword Art Online,"Action,Adventure,Fantasy,Game,Romance",7.64,913806,1297309,7.621475
2688,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.174036
5016,One Punch Man,"Action,SciFi,Comedy,Parody,SuperPower,Supernat...",8.73,687965,1015163,8.667603


############################

In [14]:
#recommending movies based on weighted average and popularity
#normalizing the weighted average and popularity column

from sklearn import preprocessing

min_max_scaler = preprocessing.MinMaxScaler()
data_scaled = min_max_scaler.fit_transform(anime_data[['weighted_avg','members']])
data_norm = pd.DataFrame(data_scaled, columns = ['weighted_avg','members'])
data_norm.head()

Unnamed: 0,weighted_avg,members
0,0.89279,0.484928
1,0.771082,0.123318
2,0.774489,0.256225
3,0.55456,0.050932
4,0.508319,0.007063


In [15]:
#creating a score value that splits scaled weighted_avg and popularity in half
anime_data[['norm_weighted_avg','norm_members']] = data_norm
anime_data['score'] = anime_data['norm_weighted_avg']*0.5 + anime_data['norm_members']*0.5
data_scored = anime_data.sort_values(['score'], ascending=False)
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
1177,Death Note,"Mystery,Police,Psychological,Supernatural,Thri...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
2688,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
4043,Shingeki no Kyojin,"Action,Military,Mystery,SuperPower,Drama,Fanta...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3313,Steins;Gate,"Thriller,SciFi",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
5016,One Punch Man,"Action,SciFi,Comedy,Parody,SuperPower,Supernat...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [16]:
#showing the normalized features and the new score  feature
data_scored[['title','norm_weighted_avg','norm_members','score']].head()

Unnamed: 0,title,norm_weighted_avg,norm_members,score
1177,Death Note,0.879009,1.0,0.939504
2688,Fullmetal Alchemist Brotherhood,1.0,0.822714,0.911357
4043,Shingeki no Kyojin,0.839465,0.923439,0.881452
3313,Steins;Gate,0.97178,0.692614,0.832197
5016,One Punch Man,0.88772,0.699081,0.793401


In [17]:
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
1177,Death Note,"Mystery,Police,Psychological,Supernatural,Thri...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
2688,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
4043,Shingeki no Kyojin,"Action,Military,Mystery,SuperPower,Drama,Fanta...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3313,Steins;Gate,"Thriller,SciFi",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
5016,One Punch Man,"Action,SciFi,Comedy,Parody,SuperPower,Supernat...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [18]:
#resetting the index to match accordingly
data_scored = data_scored.drop_duplicates()
data_scored= data_scored.reset_index()
data_scored = data_scored.drop(columns='index')
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"Mystery,Police,Psychological,Supernatural,Thri...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"Action,Military,Mystery,SuperPower,Drama,Fanta...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"Thriller,SciFi",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"Action,SciFi,Comedy,Parody,SuperPower,Supernat...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401


In [19]:
#splitting the genre column into lists
def split_columns(x):
      return x.split(',')

In [20]:
data_scored['genres'] = data_scored['genres'].apply(split_columns)

In [21]:
#mapping each title to its genre
anime_map = dict(zip(range(data_scored.shape[0]),data_scored['genres']))

In [22]:
#function to get recommendation based on genre
def get_genre_recommendations(genre):
    
    #checking if the genre is in the map inorder to pick the top ones
    index = []
    for i in range(data_scored.shape[0]):
        if genre in anime_map[i]:
            index.append(i)
        
    # Scores of the 10 most similar movies
    anime_index = index[0:10]

    # Movie indices
   # movie_indices = [i[0] for i in sig_scores]

    # Top 10 most similar movies
    return data_scored['title'].iloc[anime_index]

In [24]:
get_genre_recommendations('Drama')

1       Fullmetal Alchemist Brotherhood
2                    Shingeki no Kyojin
5        Code Geass Hangyaku no Lelouch
7                           Angel Beats
9                        Kimi no Na wa.
10    Code Geass Hangyaku no Lelouch R2
13                          Tokyo Ghoul
15        Sen to Chihiro no Kamikakushi
16              Shigatsu wa Kimi no Uso
17                         Cowboy Bebop
Name: title, dtype: object

In [26]:
data_scored.to_csv('genre_data.csv',index=False)

In [25]:
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
0,Death Note,"[Mystery, Police, Psychological, Supernatural,...",8.67,1006242,1451708,8.62831,0.879009,1.0,0.939504
1,Fullmetal Alchemist Brotherhood,"[Action, Military, Adventure, Comedy, Drama, M...",9.25,730784,1194518,9.174036,1.0,0.822714,0.911357
2,Shingeki no Kyojin,"[Action, Military, Mystery, SuperPower, Drama,...",8.49,936784,1340641,8.44995,0.839465,0.923439,0.881452
3,Steins;Gate,"[Thriller, SciFi]",9.14,561405,1005781,9.046752,0.97178,0.692614,0.832197
4,One Punch Man,"[Action, SciFi, Comedy, Parody, SuperPower, Su...",8.73,687965,1015163,8.667603,0.88772,0.699081,0.793401
