## **Barplot**

### **Librerías**

In [1]:
import pandas as pd
import numpy as np

import polars as pl

In [2]:
import plotly.express as px
import plotly.graph_objects as go

In [10]:
import sys
sys.path.append('../../../')

In [11]:
from Utils.functions_EDA import resumen_dataframe

## **Data**

In [3]:
df = pl.read_csv('../../../Data/Raw/penguins.csv')

In [12]:
df.sample()

studyName,Sample Number,Species,Region,Island,Stage,Individual ID,Clutch Completion,Date Egg,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Sex,Delta 15 N (o/oo),Delta 13 C (o/oo),Comments
str,i64,str,str,str,str,str,str,str,f64,f64,i64,i64,str,f64,f64,str
"""PAL0809""",84,"""Adelie Penguin (Pygoscelis ade…","""Anvers""","""Torgersen""","""Adult, 1 Egg Stage""","""N40A2""","""Yes""","""11/7/08""",35.1,19.4,193,4200,"""MALE""",8.90002,-26.46254,


In [20]:
df.columns = [col_name.replace(' ', '_') for col_name in df.columns]

In [92]:
resumen_dataframe(df.to_pandas())

Unnamed: 0,Variable,Tipo_Dato,Registros_Esperados,Valores_Unicos,Valores_Nulos,%Valores_Nulos
0,studyName,object,344,3,0,0.0
1,Sample_Number,int64,344,152,0,0.0
2,Species,object,344,3,0,0.0
3,Region,object,344,1,0,0.0
4,Island,object,344,3,0,0.0
5,Stage,object,344,1,0,0.0
6,Individual_ID,object,344,190,0,0.0
7,Clutch_Completion,object,344,2,0,0.0
8,Date_Egg,object,344,50,0,0.0
9,Culmen_Length_(mm),float64,344,165,2,0.58


### **Limpieza**

In [155]:
df = df.with_columns([
    pl.col('Sex')
        .replace({
            'MALE': 'Male',
            'FEMALE': 'Female',
            '.': '<--->'
        })
        .fill_null('<--->'),
    pl.col('Species').str.extract(r'\((.*?)\)', 1)
        .alias('Species_Name'),
    pl.col('Species').str.replace(r"\s*\(.*\)", '')
        .alias('Species_Abb')
])

### **Visualizaciones**

#### **Barplot - Basic**

In [212]:
df_group_species = df.group_by(
    pl.col('Species_Abb'),
    maintain_order=False
).agg(
    pl.len().alias('Count')
).sort('Count', descending=True)

In [214]:
df_group_species.to_pandas()

Unnamed: 0,Species_Abb,Count
0,Adelie Penguin,152
1,Gentoo penguin,124
2,Chinstrap penguin,68


In [215]:
fig = px.bar(
    data_frame = df_group_species.to_pandas(),
    x = 'Species_Abb',
    y = 'Count',
    color = 'Species_Abb'
)

fig.update_layout(
    showlegend = False
)

fig.show()

#### **Barplot - Layout**

In [228]:
fig = px.bar(
    data_frame = df_group_species.to_pandas(),
    x = 'Species_Abb',
    y = 'Count',
    color = 'Species_Abb',
    width=700,
    color_discrete_map={
        'Adelie Penguin': '#0f084b',
        'Chinstrap penguin': '#26408b',
        'Gentoo penguin': '#a6cfd5'
    },
    text = 'Count'
)

fig.update_layout(
    title = dict(
        text = 'Distribución de pingüinos por especie',
        font = dict(
            size = 18,
            color = 'black',
            weight = 'bold',
        ),
        x = 0.5, y = 0.95,
        xanchor = 'center'
    ),
    showlegend = False,
    paper_bgcolor = 'white',
    plot_bgcolor = 'white',
    xaxis = dict(
        title = 'Especies de pingüinos',
        titlefont = dict(
            weight = 'bold',
            size = 12
        ),
        showline = True,
        linecolor = 'black',
        ticks = 'outside'
    ),
    yaxis = dict(
        title = 'Cantidad de pingüinos',
        titlefont = dict(
            weight = 'bold',
            size = 12
        ),
        showline = True,
        linecolor = 'black',
        ticks = 'outside'
    ),
    bargap = 0.4
)

fig.update_traces(
    textposition = 'outside',
    textfont = dict(
        color = 'black',
        weight = 'bold'
    )
)

fig.show()