In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

In [2]:
df = pd.read_csv("res/anime.csv")
df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


## Data Wrangling

In [3]:
pd.set_option('display.max_columns', None)
features = df

In [4]:
#convert list columns into individual sections
def split_list(column):
    global features
    features[column] = features[column].apply(lambda x: [_.strip() for _ in x.split(",")])


split_list("Genres")
split_list("Producers")
split_list("Licensors")
split_list("Studios")

In [5]:
#replace unknowns with np.nan
features.replace(["Unknown", None], np.nan, inplace=True)

In [6]:
#convert strings to datetime objects
def str_to_dt(string):
    string = str(string)
    string_list = [string]
    if "to" in string:
        string_list = string.split("to")
        
    dts = []
    for i in string_list:
        i = i.strip()
        if i=="?" or i=="nan":
            string_dt = np.nan 
        else:
            try:
                string_dt = dt.strptime(i, "%b %d, %Y")
            except:
                try:
                    string_dt = dt.strptime(i, "%b, %Y")
                except:
                    string_dt = dt.strptime(i, "%Y")
        dts.append(string_dt)
    return dts

features["Aired"] = features["Aired"].apply(str_to_dt)

In [7]:
#convert duration into int
def str_to_num(string):
    
    string = str(string)
    if "nan" in string:
        return np.nan
    
    string_lst = string.split(".")
    minute = 0
    hour = 0
    
    for i in string_lst:
        if "min" in i:
            minute += int(i.strip("min "))
        if "hr" in i:
            hour += int(i.strip("hr "))
    
    return minute+(60*hour)

features["Duration"] = features["Duration"].apply(str_to_num)

In [8]:
#simplify Rating
def simplify_rating(rating):
    
    rating = str(rating)
    if "nan" in rating:
        return np.nan
    
    if rating.startswith("R"):
        if rating.startswith("Rx"):
            return "Rx"
        if rating.startswith("R+"):
            return "R+"
        return "R"
    
    if rating.startswith("PG"):
        if "13" in rating:
            return"PG13"
        return "PG"
    
    if rating.startswith("G"):
        return "G"

features["Rating"] = features["Rating"].apply(simplify_rating)

In [9]:
features.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,Producers,Licensors,Studios,Source,Duration,Rating,Ranked,Popularity,Members,Favorites,Watching,Completed,On-Hold,Dropped,Plan to Watch,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"[Action, Adventure, Comedy, Drama, Sci-Fi, Space]",Cowboy Bebop,カウボーイビバップ,TV,26,"[1998-04-03 00:00:00, 1999-04-24 00:00:00]",Spring 1998,[Bandai Visual],"[Funimation, Bandai Entertainment]",[Sunrise],Original,24.0,R,28.0,39,1251960,61971,105808,718161,71513,26678,329800,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"[Action, Drama, Mystery, Sci-Fi, Space]",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,[2001-09-01 00:00:00],,"[Sunrise, Bandai Visual]",[Sony Pictures Entertainment],[Bones],Original,115.0,R,159.0,518,273145,1174,4143,208333,1935,770,57964,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"[Action, Sci-Fi, Adventure, Comedy, Drama, Sho...",Trigun,トライガン,TV,26,"[1998-04-01 00:00:00, 1998-09-30 00:00:00]",Spring 1998,[Victor Entertainment],"[Funimation, Geneon Entertainment USA]",[Madhouse],Manga,24.0,PG13,266.0,201,558913,12944,29113,343492,25465,13925,146918,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"[Action, Mystery, Police, Supernatural, Drama,...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"[2002-07-02 00:00:00, 2002-12-24 00:00:00]",Summer 2002,"[TV Tokyo, Bandai Visual, Dentsu, Victor Enter...","[Funimation, Bandai Entertainment]",[Sunrise],Original,25.0,PG13,2481.0,1467,94683,587,4300,46165,5121,5378,33719,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"[Adventure, Fantasy, Shounen, Supernatural]",Beet the Vandel Buster,冒険王ビィト,TV,52,"[2004-09-30 00:00:00, 2005-09-29 00:00:00]",Fall 2004,"[TV Tokyo, Dentsu]",[Unknown],[Toei Animation],Manga,23.0,PG,3710.0,4369,13224,18,642,7314,766,1108,3394,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [12]:
df = df.sort_values(by="Members", ascending =False, kind='quicksort', na_position='last', ignore_index=False, key=None)
df.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,Producers,Licensors,Studios,Source,Duration,Rating,Ranked,Popularity,Members,Favorites,Watching,Completed,On-Hold,Dropped,Plan to Watch,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
1393,1535,Death Note,8.63,"[Mystery, Police, Psychological, Supernatural,...",Death Note,デスノート,TV,37,"[2006-10-04 00:00:00, 2007-06-27 00:00:00]",Fall 2006,"[VAP, Konami, Ashi Production, Nippon Televisi...",[VIZ Media],[Madhouse],Manga,23.0,R,60.0,1,2589552,145201,122401,2146116,75054,80834,165147,557406.0,535252.0,415890.0,201522.0,68577.0,28048.0,10462.0,3692.0,2256.0,3586.0
7449,16498,Shingeki no Kyojin,8.48,"[Action, Military, Mystery, Super Power, Drama...",Attack on Titan,進撃の巨人,TV,25,"[2013-04-07 00:00:00, 2013-09-29 00:00:00]",Spring 2013,"[Production I.G, Dentsu, Mainichi Broadcasting...",[Funimation],[Wit Studio],Manga,24.0,R,115.0,2,2531397,129844,140753,2182587,37345,44635,126077,470882.0,514879.0,459113.0,220228.0,70768.0,31141.0,11805.0,4637.0,2707.0,4939.0
3971,5114,Fullmetal Alchemist: Brotherhood,9.19,"[Action, Military, Adventure, Comedy, Drama, M...",Fullmetal Alchemist:Brotherhood,鋼の錬金術師 FULLMETAL ALCHEMIST,TV,64,"[2009-04-05 00:00:00, 2010-07-04 00:00:00]",Spring 2009,"[Aniplex, Square Enix, Mainichi Broadcasting S...","[Funimation, Aniplex of America]",[Bones],Manga,24.0,R,1.0,3,2248456,183914,171871,1644938,75728,32456,323463,714811.0,401507.0,199160.0,70045.0,20210.0,9308.0,3222.0,1536.0,2162.0,16806.0
6614,11757,Sword Art Online,7.25,"[Action, Game, Adventure, Romance, Fantasy]",Sword Art Online,ソードアート・オンライン,TV,25,"[2012-07-08 00:00:00, 2012-12-23 00:00:00]",Summer 2012,"[Aniplex, Genco, DAX Production, ASCII Media W...",[Aniplex of America],[A-1 Pictures],Light novel,23.0,PG13,2584.0,4,2214395,66342,80304,1907261,25632,90661,110537,241049.0,236672.0,305386.0,303813.0,188431.0,124819.0,81155.0,44204.0,25371.0,23472.0
10451,30276,One Punch Man,8.57,"[Action, Sci-Fi, Comedy, Parody, Super Power, ...",One Punch Man,ワンパンマン,TV,12,"[2015-10-05 00:00:00, 2015-12-21 00:00:00]",Fall 2015,"[TV Tokyo, Bandai Visual, Lantis, Asatsu DK, B...",[VIZ Media],[Madhouse],Web manga,24.0,R,81.0,5,2123866,54435,96568,1841220,30271,26755,129052,360187.0,465041.0,403832.0,172181.0,47365.0,17873.0,5706.0,2279.0,1448.0,2733.0


In [15]:
series1 = []
series2 = []
for column in df:
    if "Genres" in str(column):
        sum1 = df.loc[df[column] == 1, 'Members'].sum()
        series1.append(column)
        series2.append(sum1)
df_genres_sum = pd.DataFrame({"Genre":series1, "Sum_Genre":series2})
df_genres_sum

Unnamed: 0,Genre,Sum_Genre
0,Genres,Genres Mystery Members 608673271 dtyp...


In [14]:
import plotly.express as px

fig = px.pie(df_genres_sum, values='Sum_Genre', names='Genre', title='Different Genre popularity')
fig.show()