In [29]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [30]:
import matplotlib.font_manager as fm
plt.rc('font', family = 'Malgun Gothic')

In [31]:
df = pd.read_csv("06. 데이터 시각화 기초_penguins_lter_result.csv")

In [32]:
species_counts = df['Species'].value_counts()

In [33]:
# 1.Bar Graph

In [34]:
fig = go.Figure(
    data = [
        go.Bar(
            x = species_counts.index, 
            y = species_counts.values, 
            text = species_counts.values,
            textposition = 'auto'
            )
    ]
)

fig.update_layout(
    title = 'Penguins specie counts',
    xaxis_title = 'Species',
    yaxis_title = 'Count'
    )


In [35]:
# 2.Pie Graph

In [36]:
fig = go.Figure(
    data = [
        go.Pie(
            labels = species_counts.index, 
            values = species_counts.values,
            textinfo = 'label+percent'
            )
    ]
)

fig.update_layout(
    title = 'Penguins specie counts'
    )

fig.show()

In [37]:
# 3.Heatmap Graph

In [38]:
numeric_data = df.select_dtypes(include=['number'])

corr_matrix = numeric_data.corr()
corr_matrix

Unnamed: 0,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g),Delta 15 N (o/oo),Delta 13 C (o/oo)
Culmen Length (mm),1.0,-0.228626,0.653096,0.589451,-0.056158,0.195186
Culmen Depth (mm),-0.228626,1.0,-0.577792,-0.472016,0.590519,0.418922
Flipper Length (mm),0.653096,-0.577792,1.0,0.872979,-0.49253,-0.363132
Body Mass (g),0.589451,-0.472016,0.872979,1.0,-0.535012,-0.367915
Delta 15 N (o/oo),-0.056158,0.590519,-0.49253,-0.535012,1.0,0.564296
Delta 13 C (o/oo),0.195186,0.418922,-0.363132,-0.367915,0.564296,1.0


In [39]:
fig = go.Figure(
    data = go.Heatmap(
        z = corr_matrix.values, 
        x = numeric_data.columns, 
        y = numeric_data.columns, 
        colorscale = 'Viridis',
        text=corr_matrix.values, 
        texttemplate='%{text:.2f}'
    )
)

fig.update_layout(
    title = 'Penguins specie counts',
    width=700, 
    height=700
    )

fig.show()

In [40]:
# 4.Scatter Graph

In [41]:
fig = go.Figure(
    data = go.Scatter(
        x = df['Flipper Length (mm)'],
        y = df['Body Mass (g)'], 
        mode = 'markers', 
        marker = dict(
            color = df['Species'].astype('category').cat.codes, 
            colorscale = 'Viridis', 
            size = df['Body Mass (g)']/100
        )
    )
)

fig.update_layout(
    title = 'Correlation between Penguin Flipper Length and Weight',
    xaxis_title = 'Flipper Length (mm)',
    yaxis_title = 'Weight (g)')

fig.show()

In [42]:
# 5.Line Graph

In [43]:
df['Date Egg'] = pd.to_datetime(df['Date Egg'], errors='coerce')
df['Date Egg'].head()

date_species_penguin = df.groupby(['Date Egg', 'Species']).size().unstack()

full_date_range = pd.date_range(start=df['Date Egg'].min(), end=df['Date Egg'].max())

date_species_penguin = date_species_penguin.reindex(full_date_range).fillna(0)


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [44]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = date_species_penguin.index,    
        y = date_species_penguin['Adelie Penguin (Pygoscelis adeliae)'],
        mode='lines+markers+text',
        name='Adelie Penguin',    
        text=date_species_penguin['Adelie Penguin (Pygoscelis adeliae)'],    
        textposition='top center',    
        marker=dict(size=8, color='blue'),    
        line=dict(color='blue')
    )
)
fig.add_trace(
    go.Scatter(    
        x=date_species_penguin.index,    
        y=date_species_penguin['Gentoo penguin (Pygoscelis papua)'],    
        mode='lines+markers+text',    
        name='Gentoo Penguin',    
        text=date_species_penguin['Gentoo penguin (Pygoscelis papua)'],    
        textposition='top center',    
        marker=dict(size=8, color='green'),    
        line=dict(color='green')
    )
)
fig.add_trace(
    go.Scatter(    
        x = date_species_penguin.index,    
        y = date_species_penguin['Chinstrap penguin (Pygoscelis antarctica)'],    
        mode = 'lines+markers+text',    
        name = 'Chinstrap Penguin',    
        text = date_species_penguin['Chinstrap penguin (Pygoscelis antarctica)'],    
        textposition = 'top center',    
        marker = dict(
            size = 8, 
            color = 'red'),    
            line = dict(color = 'red')
    )
)
fig.update_layout(
    title="Number of Penguins Born by Date",    
    xaxis_title="Date",    
    yaxis_title="Hatch Count",    
    legend_title="Species",    
    width=900,    
    height=700
)