# Dashboard prototype charts

This notebook is to get some insights about _Anime_ dataset scrapped from internet, and identify the most relevant information to display it in a future dashboard.

First of all, we import required libraries.

In [1]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt

from ast import literal_eval

# Relative paths
__file__ = 'dash_proto.ipynb'
CURRENT = os.path.dirname(os.path.abspath(__file__))
ROOT = os.path.dirname(CURRENT)

An initial inspection of the dataset

In [5]:
DATA_PATH = os.path.join(ROOT, 'data/clean/anime_data_clean.csv')

anime = pd.read_csv(DATA_PATH)
anime.head()

Unnamed: 0,ranking,score,title,emission_date,url,studio,themes,genres,demographics,emission_type,...,Fantasy,Horror,Mystery,Not_available.1,Romance,Sci-Fi,Slice of Life,Sports,Supernatural,Suspense
0,1,9.13,Shingeki no Kyojin: The Final Season - Kankets...,Mar 2023 - 2023,https://myanimelist.net/anime/51535/Shingeki_n...,MAPPA,"['Gore', 'Military', 'Survival']","['Action', 'Drama', 'Suspense']",Shounen,Special,...,0,0,0,0,0,0,0,0,0,1
1,2,9.11,Fullmetal Alchemist: Brotherhood,Apr 2009 - Jul 2010,https://myanimelist.net/anime/5114/Fullmetal_A...,Bones,['Military'],"['Action', 'Adventure', 'Drama', 'Fantasy']",Shounen,TV,...,1,0,0,0,0,0,0,0,0,0
2,3,9.08,Bleach: Sennen Kessen-hen,Oct 2022 - Dec 2022,https://myanimelist.net/anime/41467/Bleach__Se...,Pierrot,['Not_available'],"['Action', 'Adventure', 'Fantasy']",Shounen,TV,...,1,0,0,0,0,0,0,0,0,0
3,4,9.08,Steins;Gate,Apr 2011 - Sep 2011,https://myanimelist.net/anime/9253/Steins_Gate,White Fox,"['Psychological', 'Time Travel']","['Drama', 'Sci-Fi', 'Suspense']",,TV,...,0,0,0,0,0,1,0,0,0,1
4,5,9.07,Gintama°,Apr 2015 - Mar 2016,https://myanimelist.net/anime/28977/Gintama°,Bandai Namco Pictures,"['Gag Humor', 'Historical', 'Parody', 'Samurai']","['Action', 'Comedy', 'Sci-Fi']",Shounen,TV,...,0,0,0,0,0,1,0,0,0,0


Global string formating functions

In [6]:
def global_format(x):
    x = re.sub(pattern=r"[°.-]+", repl='_', string=x)
    x = re.sub(pattern="'s", repl='', string=x)
    return x

# lambda function to format single string data in cell
format_single_str = lambda cel_: '_'.join(cel_.rstrip().lstrip().split(sep=' ')).lower()

# lambda function to format list like data in cell
format_list_str = lambda cel_: [
    global_format(
        '_'.join(val_.split(' ')).lower()
    ) for val_ in literal_eval(cel_)
]

Catalog functions

In [None]:
def build_catalog(df_, columns):
    
    return catalog

Encoded genres start after column 13. We can filter previous columns and apply some convenient transformations for catalog building and further analysis.

In [4]:
# Dataset column names format
anime.columns = ['_'.join(col_.split(sep=' ')).lower() for col_ in anime.columns]

# Convenient dataset content tranformations
anime = (
    anime
    
    # Ignore encoded columns
    .filter(items=anime.columns[0:13])
    
    # Transform existing columns
    .assign(
        
        # STUDIO column in snake case
        studio = lambda df_: (
            df_
            .studio
            # Handle nan's
            .fillna(value='not_available')
            # Replace whitespace with '_' in column
            .apply(format_single_str)
            # Remove abnormal characters in column
            .apply(global_format))
        
        # THEMES column in snake case
        ,themes = lambda df_: (
            df_.
            themes
            # Handle nan's
            .fillna(value="['not_available']")
            # Replace whitespace with '_' in column
            .apply(format_list_str))
        
        # GENRES column in snake case
        ,genres = lambda df_: (
            df_
            .genres
            # Handle nan's
            .fillna(value="['not_available']")
            # Replace whitespace with '_' in column
            .apply(format_list_str))
        
        # DEMOGRAPHICS column in snake case
        ,demographics = lambda df_: (
            df_
            .demographics
            # Handle nan's
            .fillna(value='not_available')
            # Replace whitespace with '_' in column
            .apply(format_single_str)
            # Remove abnormal characters in column
            .apply(global_format))
        
        # EMISSION_TYPE column in snake case
        ,emission_type = lambda df_: (
            df_
            .emission_type
            # Handle nan's
            .fillna(value='not_available')
            # Replace whitespace with '_' in column
            .apply(format_single_str)
            # Remove abnormal characters in column
            .apply(global_format))
        
        # FIRST_EMISSION as datetime
        ,first_emission = lambda df_: pd.to_datetime(df_.first_emission)
    )
    
    # Add computed columns
    .assign(
        # Extract year from emission date
        year = lambda df_: df_.first_emission.dt.year
        # Extract month from emission date
        ,month = lambda df_: df_.first_emission.dt.month
    )
)

test.head()

Unnamed: 0,ranking,score,title,emission_date,url,studio,themes,genres,demographics,emission_type,number_episode,members,first_emission,year,month
0,1,9.13,Shingeki no Kyojin: The Final Season - Kankets...,Mar 2023 - 2023,https://myanimelist.net/anime/51535/Shingeki_n...,mappa,"[gore, military, survival]","[action, drama, suspense]",shounen,special,2.0,372149,2023-03-01,2023.0,3.0
1,2,9.11,Fullmetal Alchemist: Brotherhood,Apr 2009 - Jul 2010,https://myanimelist.net/anime/5114/Fullmetal_A...,bones,[military],"[action, adventure, drama, fantasy]",shounen,tv,64.0,3120633,2009-04-01,2009.0,4.0
2,3,9.08,Bleach: Sennen Kessen-hen,Oct 2022 - Dec 2022,https://myanimelist.net/anime/41467/Bleach__Se...,pierrot,[not_available],"[action, adventure, fantasy]",shounen,tv,13.0,411507,2022-10-01,2022.0,10.0
3,4,9.08,Steins;Gate,Apr 2011 - Sep 2011,https://myanimelist.net/anime/9253/Steins_Gate,white_fox,"[psychological, time_travel]","[drama, sci_fi, suspense]",not_available,tv,24.0,2401772,2011-04-01,2011.0,4.0
4,5,9.07,Gintama°,Apr 2015 - Mar 2016,https://myanimelist.net/anime/28977/Gintama°,bandai_namco_pictures,"[gag_humor, historical, parody, samurai]","[action, comedy, sci_fi]",shounen,tv,51.0,584278,2015-04-01,2015.0,4.0
