In [1]:
#importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [18]:
#loading the dataset
anime_data = pd.read_csv('../DATA/Anime_new_cleaned.csv')

In [19]:
anime_data.columns = map(str.lower, anime_data.columns)
anime_data.head(3)

Unnamed: 0,anime_id,title,genres,synopsis,anime_type,producer,studio,rating,scored_by,popularity,members,num_of_episodes,anime_source,aired,link
0,1,Cowboy Bebop,"['Action','Adventure','Comedy','Drama','SciFi'...","In the year 2071, humanity has colonized sever...",TV,['BandaiVisual'],Sunrise,8.81,363889.0,39.0,704490.0,26.0,Original,"Apr 3, 1998 to Apr 24, 1999",https://myanimelist.net/anime/1/Cowboy_Bebop
1,5,Cowboy Bebop Tengoku no Tobira,"['Action','Space','Drama','Mystery','SciFi']","Another day, another bountysuch is the life of...",Movie,"['Sunrise','BandaiVisual']",Bones,8.41,111187.0,475.0,179899.0,1.0,Original,"Sep 1, 2001",https://myanimelist.net/anime/5/Cowboy_Bebop__...
2,6,Trigun,"['Action','SciFi','Adventure','Comedy','Drama'...","Vash the Stampede is the man with a 60,000,000...",TV,['VictorEntertainment'],Madhouse,8.31,197451.0,158.0,372709.0,26.0,Manga,"Apr 1, 1998 to Sep 30, 1998",https://myanimelist.net/anime/6/Trigun


In [20]:
#creating the function to remove the characters
import re
def clean_up(s):
    return re.sub(r"[^\sa-zA-Z0-9\.\,\;\!\-]","",str(s))

In [21]:
anime_data['title'] = anime_data['title'].apply(clean_up)
anime_data['genres'] = anime_data['genres'].apply(clean_up)
anime_data['producer'] = anime_data['producer'].apply(clean_up)
anime_data['studio'] = anime_data['studio'].apply(clean_up)

In [22]:
#accessing a limited number of columns required for weighted score recommendation
anime_data = anime_data[['title','genres','rating','scored_by','members']]
anime_data.head(2)

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0


In [23]:
anime_data =  anime_data[anime_data['members']>=1000]
anime_data

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451.0,372709.0
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875.0,74889.0
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757.0,11247.0
...,...,...,...,...,...
15811,Mega Man,,6.52,,3263.0
15812,AKo The Versus,,6.49,,2457.0
15813,Magic Knight Rayearth Omake,,6.45,,1730.0
15814,Rilu Rilu Fairilu Yousei no Door,,6.36,,1293.0


In [24]:
anime_data = anime_data.dropna(axis=0)
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889.0,704490.0
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187.0,179899.0
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451.0,372709.0
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875.0,74889.0
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757.0,11247.0


In [25]:
anime_data = anime_data.astype({'scored_by':'int64','members':'int64'})
anime_data.head()

Unnamed: 0,title,genres,rating,scored_by,members
0,Cowboy Bebop,"Action,Adventure,Comedy,Drama,SciFi,Space",8.81,363889,704490
1,Cowboy Bebop Tengoku no Tobira,"Action,Space,Drama,Mystery,SciFi",8.41,111187,179899
2,Trigun,"Action,SciFi,Adventure,Comedy,Drama,Shounen",8.31,197451,372709
3,Witch Hunter Robin,"Action,Magic,Police,Supernatural,Drama,Mystery",7.34,31875,74889
4,Bouken Ou Beet,"Adventure,Fantasy,Shounen,Supernatural",7.04,4757,11247


In [26]:
#finding the weighted average for each anime
V = anime_data['scored_by']                       #number of votes for each anime
R = anime_data['rating']                     #average voting rating for each anime
C = anime_data['rating'].mean()              #mean vote across the whole report
m = anime_data['scored_by'].quantile(0.80)        #minimum votes requires to be among the top 

anime_data['weighted_avg'] = ((R*V)+(C*m))/(V+m)     #used by imdb to calculate weighted average of movies

In [27]:
#recommending the top 10 anime with the highest weighted average
wavg = anime_data.sort_values('weighted_avg', ascending = False)
wavg.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg
3154,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.173719
9137,Kimi no Na wa.,"Supernatural,Drama,Romance,School",9.19,467437,724490,9.075973
4376,Steins;Gate,"Thriller,SciFi",9.14,561405,1005781,9.046377
5000,Hunter x Hunter 2011,"Action,Adventure,SuperPower,Shounen",9.11,400879,716409,8.98285
2875,Clannad After Story,"SliceofLife,Comedy,Supernatural,Drama,Romance",9.01,362434,605864,8.876688


In [28]:
#sorting data according to their popularity
anime_members = anime_data.sort_values('members',ascending = False)
anime_members.head(5)

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg
1205,Death Note,"Mystery,Police,Psychological,Supernatural,Thri...",8.67,1006242,1451708,8.62816
5600,Shingeki no Kyojin,"Action,Military,Mystery,SuperPower,Drama,Fanta...",8.49,936784,1340641,8.449819
5086,Sword Art Online,"Action,Adventure,Fantasy,Game,Romance",7.64,913806,1297309,7.621479
3154,Fullmetal Alchemist Brotherhood,"Action,Military,Adventure,Comedy,Drama,Magic,F...",9.25,730784,1194518,9.173719
8315,One Punch Man,"Action,SciFi,Comedy,Parody,SuperPower,Supernat...",8.73,687965,1015163,8.667378


############################

In [29]:
#recommending movies based on weighted average and popularity
#normalizing the weighted average and popularity column

from sklearn import preprocessing

min_max_scaler = preprocessing.MinMaxScaler()
data_scaled = min_max_scaler.fit_transform(anime_data[['weighted_avg','members']])
data_norm = pd.DataFrame(data_scaled, columns = ['weighted_avg','members'])
data_norm.head()

Unnamed: 0,weighted_avg,members
0,0.892533,0.484928
1,0.770526,0.123318
2,0.773987,0.256225
3,0.553953,0.050932
4,0.508125,0.007063


In [30]:
#creating a score value that splits scaled weighted_avg and popularity in half
anime_data[['norm_weighted_avg','norm_members']] = data_norm
anime_data['score'] = anime_data['norm_weighted_avg']*0.5 + anime_data['norm_members']*0.5
data_scored = anime_data.sort_values(['score'], ascending=False)
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
1195,Pokemon Crystal Raikou Ikazuchi no Densetsu,"Action,Adventure,Comedy,Fantasy,Kids",6.99,9228,16725,6.956946,0.878782,1.0,0.939391
2714,Chis Sweet Home,"Comedy,SliceofLife",7.79,16557,43383,7.281435,1.0,0.822714,0.911357
4071,Toaru Kagaku no Railgun Motto Marutto Railgun,Comedy,6.94,14619,30085,6.942998,0.839156,0.923439,0.881298
3340,Kiniro no Corda Secondo Passo,"Harem,Music,School,Shoujo",7.23,15133,26860,7.052284,0.971706,0.692614,0.83216
1227,Yamato Nadeshiko Shichihenge,"Comedy,Shoujo",7.86,56643,106478,7.579667,0.898978,0.677375,0.788176


In [31]:
#showing the normalized features and the new score  feature
data_scored[['title','norm_weighted_avg','norm_members','score']].head()

Unnamed: 0,title,norm_weighted_avg,norm_members,score
1195,Pokemon Crystal Raikou Ikazuchi no Densetsu,0.878782,1.0,0.939391
2714,Chis Sweet Home,1.0,0.822714,0.911357
4071,Toaru Kagaku no Railgun Motto Marutto Railgun,0.839156,0.923439,0.881298
3340,Kiniro no Corda Secondo Passo,0.971706,0.692614,0.83216
1227,Yamato Nadeshiko Shichihenge,0.898978,0.677375,0.788176


In [32]:
data_scored.head()

Unnamed: 0,title,genres,rating,scored_by,members,weighted_avg,norm_weighted_avg,norm_members,score
1195,Pokemon Crystal Raikou Ikazuchi no Densetsu,"Action,Adventure,Comedy,Fantasy,Kids",6.99,9228,16725,6.956946,0.878782,1.0,0.939391
2714,Chis Sweet Home,"Comedy,SliceofLife",7.79,16557,43383,7.281435,1.0,0.822714,0.911357
4071,Toaru Kagaku no Railgun Motto Marutto Railgun,Comedy,6.94,14619,30085,6.942998,0.839156,0.923439,0.881298
3340,Kiniro no Corda Secondo Passo,"Harem,Music,School,Shoujo",7.23,15133,26860,7.052284,0.971706,0.692614,0.83216
1227,Yamato Nadeshiko Shichihenge,"Comedy,Shoujo",7.86,56643,106478,7.579667,0.898978,0.677375,0.788176
