In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from utils import load_config, tabulacion, apply_format
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
import plotly.graph_objects as go

In [2]:
CURRENT=os.getcwd()
ROOT=os.path.dirname(CURRENT)

In [3]:
config_f=load_config("config.yaml")

In [4]:
datos=pd.read_csv(os.path.join(ROOT,config_f["data"]["main"]))
genres=pd.read_csv(os.path.join(ROOT,config_f["data"]["cat_genres"]))
themes=pd.read_csv(os.path.join(ROOT,config_f["data"]["cat_themes"]))

In [5]:
datos_display=datos[['ranking', 'score', 'title', 'studio', 'themes', 'genres',
       'demographics', 'number_episodes', 'members','year']]\
    .assign(
        themes = lambda df_: df_.themes\
            .str.replace("_"," ", regex=True)\
            .str.replace("\[","", regex=True)\
            .str.replace("\]","", regex=True)\
            .str.replace("\'","", regex=True),
        genres = lambda df_: df_.genres\
            .str.replace("_"," ", regex=True)\
            .str.replace("\[","", regex=True)\
            .str.replace("\]","", regex=True)\
            .str.replace("\'","", regex=True),
        studio = lambda df_:df_.studio\
            .str.replace("_"," ", regex=True)\
            .str.capitalize()
    )

### Top Animes con mejor calificación promedio:

<p float="left">
  <img src="images/FMa.jpe" width="25%" height="200" />
  <img src="images/oshinoko1.jpg" width="35%" height="200"/> 
  <img src="images/450_1000.jpeg" width="30%" height="200" />
</p>


In [6]:
top_10_animes=apply_format(datos_display.head(10))
columnas_top1=['ranking', 'Score promedio ', 'Título', 'Estudio', 'Temática', 'Público objetivo']
tabulacion(top_10_animes, columnas_top1, top_margin=0, bottom_margin=0, left_margin=0, right_margin=0, w=600, h=445)

<br>
<br>
<br>


### Top animes populares

<p float="left">
  <img src="images/450_1000.jpeg" width="25%" height="200" />
  <img src="images/desnoto.jpeg" width="35%" height="200"/> 
  <img src="images/FMa.jpe" width="30%" height="200" />
</p>


In [7]:
most_popular_anime=apply_format(datos_display\
                                .sort_values(by="members", ascending=False).head(10))
columnas_toppop=['ranking', 'Score promedio ', 'Título', 'Estudio', 'Temática', 'Público objetivo', 'Popularidad']
tabulacion(most_popular_anime, columnas_toppop, top_margin=0, bottom_margin=0, left_margin=0, right_margin=0, w=650, h=430)

<br>
<br>
<br>


### Top animes mas largos

<p float="left">
  <img src="images/doraemon.jpg" width="80%" height="350" />
</p>


In [8]:
top_longest_animes=apply_format(datos_display.sort_values(by="number_episodes",ascending= False)\
            .head(10))
columnas_topL=['ranking', 'Score promedio ', 'Título', 'Episodios','Año']
tabulacion(top_longest_animes, columnas_topL, top_margin=0, bottom_margin=0, left_margin=0, right_margin=0, w=600, h=220)

<br>
<br>
<br>
<br>
<br>


### Quien puntúa más alto:

In [9]:
import plotly.express as px
df = px.data.tips()
fig = px.histogram(datos_display, x="score",
                   title='Distribucion de los scores de animes',
                   labels={'score':'Score promedio'}, # can specify one label per df column
                   opacity=0.8,
                   log_y=False, # represent bars with log scale
                   color_discrete_sequence=['indianred'] # color of histogram bars
                   )

fig.update_layout(
        autosize=False,
        width=800,
        height=500,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=5
        )
    )
fig.show()

<br>
<br>
<br>


In [10]:
top_10_popular_studios=\
    datos[['studio','members', 'score']]\
    .groupby('studio')\
    .mean()\
    .sort_values(by="members", ascending=False)\
    .head(10)\
    .rename(index=lambda x: x.replace('_', ' ').title())

In [11]:
# Definir los datos
studios = top_10_popular_studios.index.tolist()
members = top_10_popular_studios['members'].tolist()
scores = top_10_popular_studios['score'].tolist()

# Crear una traza para los miembros
trace1 = go.Bar(x=studios, y=members, name='Popularidad', marker=dict(color='rgba(55, 83, 109, 0.7)'))

# Crear una traza para los puntajes, que estarán en el eje secundario
trace2 = go.Scatter(x=studios, y=scores, name='Score', mode='markers+text', yaxis='y2', textposition='top center', text=[str(round(i, 2)) for i in scores])

# Crear un layout, note la 'yaxis2' para la segunda traza
layout = go.Layout(
    title='Estudios más populares',
    yaxis=dict(
        title='Popularidad',
        titlefont=dict(color='rgba(55, 83, 109, 0.7)'),
        tickfont=dict(color='rgba(55, 83, 109, 0.7)')
    ),
    yaxis2=dict(
        title='Score',
        overlaying='y',
        side='right',
        showgrid=False,
        titlefont=dict(color='rgba(245, 39, 39, 0.8)'),
        tickfont=dict(color='rgba(245, 39, 39, 0.8)'),
        range=[0,10]
    ),
    legend=dict(
        x=1.1,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1
)




fig = go.Figure(data=[trace1, trace2], layout=layout)

fig.update_layout(
        autosize=False,
        width=800,
        height=500,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=5
        )
    )
    
fig.show()

<br>
<br>
<br>


In [12]:
top_10_more_episodes_studios = (
    datos[['studio','number_episodes','score']]
    .groupby('studio')
    .agg({
        'number_episodes': 'sum',
        'score': 'mean'
    })
    .sort_values(by="number_episodes", ascending=False)
    .head(11)\
    .rename(index=lambda x: x.replace('_', ' ').title())\
    .iloc[1:]

)
#top_studios_list = top_10_popular_studios.index.tolist()
#animes_from_top_studios = datos[datos['studio'].isin(top_studios_list)]

In [13]:
# Definir los datos
studios = top_10_more_episodes_studios.index.tolist()
episodes = top_10_more_episodes_studios['number_episodes'].tolist()
scores = top_10_more_episodes_studios['score'].tolist()

# Crear una traza para los miembros
trace1 = go.Bar(x=studios, y=episodes, name='Episodios', marker=dict(color='rgba(55, 83, 109, 0.7)'))

# Crear una traza para los puntajes, que estarán en el eje secundario
trace2 = go.Scatter(x=studios, y=scores, name='Score', mode='markers+text', yaxis='y2', textposition='top center', text=[str(round(i, 2)) for i in scores])

# Crear un layout, note la 'yaxis2' para la segunda traza
layout = go.Layout(
    title='Estudios más productivos',
    yaxis=dict(
        title='Episodios',
        titlefont=dict(color='rgba(55, 83, 109, 0.7)'),
        tickfont=dict(color='rgba(55, 83, 109, 0.7)')
    ),
    yaxis2=dict(
        title='Score',
        overlaying='y',
        side='right',
        showgrid=False,
        titlefont=dict(color='rgba(245, 39, 39, 0.8)'),
        tickfont=dict(color='rgba(245, 39, 39, 0.8)'),
        range=[0,10]
    ),
    legend=dict(
        x=1.1,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1
)




fig = go.Figure(data=[trace1, trace2], layout=layout)

fig.update_layout(
        autosize=False,
        width=800,
        height=500,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=5
        )
    )
    
fig.show()

<br>
<br>
<br>


In [14]:
top10_popular_demographics = datos[['demographics','members','score']]\
    .groupby("demographics")\
    .mean()\
    .drop("not_available", axis=0,errors='ignore')\
    .sort_values(by="members", ascending = False)\
    .rename(index=lambda x: x.replace('_', ' ').title())

In [15]:
# Definir los datos
studios = top10_popular_demographics.index.tolist()
members = top10_popular_demographics['members'].tolist()
scores = top10_popular_demographics['score'].tolist()

# Crear una traza para los miembros
trace1 = go.Bar(x=studios, y=members, name='Popularidad', marker=dict(color='rgba(55, 83, 109, 0.7)'))

# Crear una traza para los puntajes, que estarán en el eje secundario
trace2 = go.Scatter(x=studios, y=scores, name='Score', mode='markers+text', yaxis='y2', textposition='top center', text=[str(round(i, 2)) for i in scores])

# Crear un layout, note la 'yaxis2' para la segunda traza
layout = go.Layout(
    title='Poblacion objetivo más popular',
    yaxis=dict(
        title='Episodios',
        titlefont=dict(color='rgba(55, 83, 109, 0.7)'),
        tickfont=dict(color='rgba(55, 83, 109, 0.7)')
    ),
    yaxis2=dict(
        title='Score',
        overlaying='y',
        side='right',
        showgrid=False,
        titlefont=dict(color='rgba(245, 39, 39, 0.8)'),
        tickfont=dict(color='rgba(245, 39, 39, 0.8)'),
        range=[0,10]
    ),
    legend=dict(
        x=1.1,
        y=1.0,
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1
)




fig = go.Figure(data=[trace1, trace2], layout=layout)

fig.update_layout(
        autosize=False,
        width=800,
        height=500,
        margin=dict(
            l=50,
            r=50,
            b=100,
            t=100,
            pad=5
        )
    )
    
fig.show()