## Data Visualization

In [107]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [108]:
# Load our data from the last notebook
df = pd.read_csv('Titanic_Processed.csv')
df = df.drop(columns=['Unnamed: 0'])

df.head()

Unnamed: 0,pclass,survived,sex,age,Siblings and Spouses Aboard,Parents and Children Aboard,fare,embarked,cabin_letter,is_male,Total Family Aboard
0,1.0,1.0,female,29.0,0.0,0.0,211.3375,Southampton,B,0,0.0
1,1.0,1.0,male,0.9167,1.0,2.0,151.55,Southampton,C,1,3.0
2,1.0,0.0,female,2.0,1.0,2.0,151.55,Southampton,C,0,3.0
3,1.0,0.0,male,30.0,1.0,2.0,151.55,Southampton,C,1,3.0
4,1.0,0.0,female,25.0,1.0,2.0,151.55,Southampton,C,0,3.0


### Theming

Plotly has several themes built in, such as 'plotly_dark', or you can create and customize your own and use it as a default.

In [109]:
# Specify defaults for Plotly settings
px.defaults.color_continuous_scale = px.colors.sequential.Agsunset
px.defaults.color_discrete_sequence = px.colors.qualitative.Prism
px.defaults.template = 'plotly_dark'

### Histograms

Histograms show a frequency density based on bin sizes. You can also segment values by color.

In [110]:
px.histogram(df, title='Survivorship by Age', x='age', color='survived')

In [111]:
# If you want, you can customize the bin size to fit your data
px.histogram(df, title='Survivorship by Age', x='age', nbins=10, color='survived')

### Box Plots
Anything you can plot in a Histogram you can also show with a statistical box plot to show distributions

In [112]:
px.box(df, title='Age by Gender', x='age', y='sex', color='sex')

In [113]:
# You can also change how it displays its data points
px.box(df, title='Age by Gender', x='age', y='sex', color='sex', points='all')

In [114]:
# The same data could also be represented as a violin plot
px.violin(df, title='Age by Gender', x='age', y='sex', color='sex')

In [115]:
# Or a violin with a box inside of it!
px.violin(df, title='Age by Gender', x='age', y='sex', color='sex', box=True)

In [116]:
# Color could also reference a completely different dimension
px.violin(df, 
          title='Age by Gender and Passenger Class', 
          x='age', 
          y='sex', 
          color='pclass', 
          labels={'pclass': 'Passenger Class'})

### Scatter Plots
Scatter Plots are fantastic for identifying potential correlation between variables

In [117]:
px.scatter(df, 
           title='Relationship between Family Member Count and Survivorship',
           x='Siblings and Spouses Aboard',
           y='Parents and Children Aboard',
           size='Total Family Aboard',
           color='survived')

In [118]:
fig = px.scatter_3d(df, 
           title='Relationship between Family Member Count and Survivorship',
           x='Siblings and Spouses Aboard',
           y='Parents and Children Aboard',
           z='age',
           size='Total Family Aboard',
           color='age')

fig.update_layout(width=800, height=600)
fig.show()

### Line Chart
Line Charts are common and important. Let's use them to look at grouped data.

In [119]:
# Create a simple aggregate dataset grouped by the age of the passenger
df_ages = df.groupby('age').agg('mean')
df_ages.head(3)

Unnamed: 0_level_0,pclass,survived,Siblings and Spouses Aboard,Parents and Children Aboard,fare,is_male,Total Family Aboard
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1667,3.0,1.0,1.0,2.0,20.575,0.0,3.0
0.3333,3.0,0.0,0.0,2.0,14.4,1.0,2.0
0.4167,3.0,1.0,0.0,1.0,8.5167,1.0,1.0


In [120]:
px.line(df_ages, 
        title='Survivorship by Age',
        x=df_ages.index, 
        y='survived',
        labels={'survived': '% Survived'})

In [121]:
# Any line chart could also be an area chart
px.area(df_ages, 
        title='Survivorship by Age',
        x=df_ages.index, 
        y='survived',
        labels={'survived': '% Survived'})

### Bar Charts

Bar charts are fantastic at comparing a discrete number of quantities

In [122]:
df_pclass = df.groupby('pclass').agg('mean')
df_pclass.head()

Unnamed: 0_level_0,survived,age,Siblings and Spouses Aboard,Parents and Children Aboard,fare,is_male,Total Family Aboard
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,0.616822,39.083038,0.439252,0.367601,87.555777,0.557632,0.806854
2.0,0.429603,29.506705,0.393502,0.368231,21.179196,0.617329,0.761733
3.0,0.255289,24.816367,0.568406,0.400564,13.302889,0.695346,0.96897


In [123]:
# Or a bar chart
fig = px.bar(df_pclass, 
        title='Survivorship by Passenger Class',
        x=df_pclass.index, 
        y='survived',
        color=df_pclass.index,
        labels={'survived': '% Survived', 'pclass': 'Passenger Class'})

# Hide the color axis since it's distracting here
fig.update_coloraxes(showscale=False)

fig.show()

### Tree Maps

Because why not?

Seriously, these things are great for hierarchical data.

In [124]:
df['Passenger Class'] = df['pclass'].map({1: '1st Class', 2: '2nd Class', 3: '3rd Class'})

fig = px.treemap(df,                  
                 path=[px.Constant('All Passengers'), 'embarked', 'Passenger Class', 'sex'], 
                 color='survived', 
                 labels={'survived': '% Surviving'})

fig.update_layout(height=600)
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [125]:
# Any treemap can be shown as a sunburst

fig = px.sunburst(df,                  
                 path=[px.Constant('All Passengers'), 'embarked', 'Passenger Class', 'sex'], 
                 color='survived', 
                 labels={'survived': '% Surviving'})

fig.update_layout(height=600)
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

