# Bivariate visualizations

In [3]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

## Load the data

In [17]:
penguins = pd.read_csv("../data/penguins.csv")

In [18]:
penguins.dtypes

Unnamed: 0               int64
studyName               object
Sample Number            int64
Species                 object
Region                  object
Island                  object
Stage                   object
Individual ID           object
Clutch Completion       object
Date Egg                object
Culmen Length (mm)     float64
Culmen Depth (mm)      float64
Flipper Length (mm)    float64
Body Mass (g)          float64
Sex                     object
Delta 15 N (o/oo)      float64
Delta 13 C (o/oo)      float64
Comments                object
dtype: object

## Scatter Plot

In [19]:
# Set up the color map
color_map = {'Adelie': 'rgb(235,52,52)',
'Gentoo': 'rgb(235,149,52)',
'Chinstrap':'rgb(67,52,235)'}

# Create a scatterplot
fig = px.scatter(data_frame=penguins,
                 title="Penguin Culmen Statistics",
                 x="Culmen Length (mm)",
                 y="Culmen Depth (mm)",
                 color="Species",
                 color_discrete_map=color_map
)

# Show your work
fig.show()
with open("../docs/plots/scatter/scatter_plot_1.html", "w") as f:
    f.write(fig.to_html())

## Line Plot

## Correlation Plot

In [21]:
use_cols=['Culmen Length (mm)',
         'Culmen Depth (mm)',
         'Flipper Length (mm)',
         'Body Mass (g)']

In [22]:
# Create a correlation table with pandas
penguin_corr = penguins[use_cols].corr(method="pearson")

In [23]:
penguin_corr

Unnamed: 0,Culmen Length (mm),Culmen Depth (mm),Flipper Length (mm),Body Mass (g)
Culmen Length (mm),1.0,-0.235053,0.656181,0.59511
Culmen Depth (mm),-0.235053,1.0,-0.583851,-0.471916
Flipper Length (mm),0.656181,-0.583851,1.0,0.871202
Body Mass (g),0.59511,-0.471916,0.871202,1.0


In [25]:
# Set up the correlation plot
fig = go.Figure(go.Heatmap(
  		# Set the appropriate x, y and z values
        z=penguin_corr.values.tolist(),
        x=penguin_corr.columns,
        y=penguin_corr.columns,
  		# Set the color scale,
        colorscale='rdylgn', 
  		# Set min and max values
        zmin=-1, zmax=1))

# Show the plot
fig.show()
with open("../docs/plots/correlation/correlation_plot_1.html", "w") as f:
    f.write(fig.to_html())