In [1]:
import numpy as np 
import pandas as pd

In [2]:
anime = pd.read_csv('./data/anime.csv')
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
anime['type'].unique()

array(['Movie', 'TV', 'OVA', 'Special', 'Music', 'ONA', nan], dtype=object)

In [7]:
round(anime['members'].describe(), 2)

count      12294.00
mean       18071.34
std        54820.68
min            5.00
25%          225.00
50%         1550.00
75%         9437.00
max      1013917.00
Name: members, dtype: float64

In [5]:
anime_tv = anime[anime['type']=='TV']
anime_tv.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351


In [4]:
anime_tv['rating'].describe()

count    3671.000000
mean        6.902299
std         0.863526
min         2.670000
25%         6.440000
50%         6.940000
75%         7.460000
max         9.600000
Name: rating, dtype: float64

In [5]:
anime_tv.isnull().sum()

anime_id      0
name          0
genre        10
type          0
episodes      0
rating      116
members       0
dtype: int64

In [6]:
anime_tv_drop = anime_tv.dropna()
anime_tv_drop.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

## weighted rating

In [7]:
m = anime_tv_drop['members'].quantile(0.75)
m

46041.25

In [8]:
C = anime_tv_drop['rating'].mean()
C

6.903320610687023

In [9]:
def weighted_rating(df, m, C):
    term = df['members'] / (m + df['members'])
    return df['rating'] * term + (1 - term) * C

In [10]:
anime_weighted_rating = anime_tv_drop.copy()
anime_weighted_rating['weighted_rating'] = anime_weighted_rating.apply(weighted_rating, axis=1, args=(m, C))
anime_weighted_rating.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,weighted_rating
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665,9.130783
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262,8.576002
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572,9.024977
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266,8.633408
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351,8.407922


In [11]:
anime_df = anime_weighted_rating.loc[:, ['name', 'genre', 'weighted_rating']]
anime_df.head()

Unnamed: 0,name,genre,weighted_rating
1,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.130783
2,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",8.576002
3,Steins;Gate,"Sci-Fi, Thriller",9.024977
4,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",8.633408
5,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",8.407922


In [12]:
anime_df[anime_df['genre'].str.contains('Romance')].sort_values('weighted_rating', ascending=False)['name'][:10].tolist()

['Clannad: After Story',
 'Shigatsu wa Kimi no Uso',
 'Monogatari Series: Second Season',
 'Toradora!',
 'Bakemonogatari',
 'Bakuman. 3rd Season',
 'Ouran Koukou Host Club',
 'Sakurasou no Pet na Kanojo',
 'Nana',
 'Katanagatari']

In [13]:
def genre_top10(genre):
    return anime_df[anime_df['genre'].str.contains(genre)].sort_values('weighted_rating', ascending=False)['name'][:10].tolist()

In [14]:
for i in genre_top10('Action'): print(i)

Fullmetal Alchemist: Brotherhood
Hunter x Hunter (2011)
Code Geass: Hangyaku no Lelouch R2
Gintama
Code Geass: Hangyaku no Lelouch
One Punch Man
Cowboy Bebop
Tengen Toppa Gurren Lagann
Gintama&#039;
Gintama°


In [15]:
genre_set = set()
for genre in anime_df['genre'].unique():
    for g_sp in genre.split(','):
        genre_set.add(g_sp)
        
genre_set

{' Adventure',
 ' Cars',
 ' Comedy',
 ' Dementia',
 ' Demons',
 ' Drama',
 ' Ecchi',
 ' Fantasy',
 ' Game',
 ' Harem',
 ' Historical',
 ' Horror',
 ' Josei',
 ' Kids',
 ' Magic',
 ' Martial Arts',
 ' Mecha',
 ' Military',
 ' Music',
 ' Mystery',
 ' Parody',
 ' Police',
 ' Psychological',
 ' Romance',
 ' Samurai',
 ' School',
 ' Sci-Fi',
 ' Seinen',
 ' Shoujo',
 ' Shoujo Ai',
 ' Shounen',
 ' Shounen Ai',
 ' Slice of Life',
 ' Space',
 ' Sports',
 ' Super Power',
 ' Supernatural',
 ' Thriller',
 ' Vampire',
 'Action',
 'Adventure',
 'Cars',
 'Comedy',
 'Dementia',
 'Demons',
 'Drama',
 'Ecchi',
 'Fantasy',
 'Game',
 'Harem',
 'Historical',
 'Horror',
 'Josei',
 'Kids',
 'Magic',
 'Martial Arts',
 'Mecha',
 'Military',
 'Music',
 'Mystery',
 'Parody',
 'Psychological',
 'Romance',
 'School',
 'Sci-Fi',
 'Seinen',
 'Shoujo',
 'Shounen',
 'Slice of Life',
 'Sports',
 'Supernatural'}

In [16]:
genres = ['Action', 'Comedy', 'Romance', 'Drama', 'Fantasy', 'Horror']

## pickle

In [58]:
import pickle

# with open('genres_df.pickle', 'wb') as f:
#     pickle.dump(anime_df, f)