In [117]:
import pandas as pd
import numpy as np
import altair as alt
import streamlit as st

In [118]:
df1 = pd.read_csv('insurance.csv')
df2 = pd.read_csv('income.csv')
df3 = pd.read_csv('healthy.csv')

df = pd.merge(df1, df2, on='Location')
df = df[['Location', 'Estimate (%)',  'Estimate ($)']]
df.columns = ['Location', 'Insurance_rate', 'Income']
df['Disabled'] = True
df

Unnamed: 0,Location,Insurance_rate,Income,Disabled
0,ALABAMA,14.5,40400,True
1,ALASKA,11.1,46800,True
2,ARIZONA,11.0,39200,True
3,ARKANSAS,7.4,35400,True
4,CALIFORNIA,7.6,48500,True
5,COLORADO,8.1,47500,True
6,CONNECTICUT,5.0,50500,True
7,DELAWARE,7.0,50500,True
8,DISTRICT OF COLUMBIA,2.9,60600,True
9,FLORIDA,15.1,38400,True


In [119]:
dff = pd.merge(df2, df3, on = 'Location')
dff = dff[['Location', 'Estimate (%)', 'Estimate ($)']]
dff.columns = ['Location', 'Insurance_rate', 'Income']
dff['Disabled'] = False
dff

Unnamed: 0,Location,Insurance_rate,Income,Disabled
0,ALABAMA,14.6,40400,False
1,ALASKA,15.2,46800,False
2,ARIZONA,15.6,39200,False
3,ARKANSAS,14.0,35400,False
4,CALIFORNIA,11.2,48500,False
5,COLORADO,10.6,47500,False
6,CONNECTICUT,8.6,50500,False
7,DELAWARE,9.5,50500,False
8,DISTRICT OF COLUMBIA,4.2,60600,False
9,FLORIDA,19.7,38400,False


In [120]:
df = pd.concat([df, dff])
df

Unnamed: 0,Location,Insurance_rate,Income,Disabled
0,ALABAMA,14.5,40400,True
1,ALASKA,11.1,46800,True
2,ARIZONA,11.0,39200,True
3,ARKANSAS,7.4,35400,True
4,CALIFORNIA,7.6,48500,True
...,...,...,...,...
46,VIRGINIA,10.9,46500,False
47,WASHINGTON,9.4,50500,False
48,WEST VIRGINIA,10.9,39400,False
49,WISCONSIN,8.2,43400,False


In [121]:
selection = alt.selection_single(empty = 'none', on = 'mouseover', nearest = True, fields = ['Income', 'Insurance_rate'])

subgroup = alt.selection_single(name = 'subgroup', fields = ['Disabled'], init = {'Disabled': True}, bind = alt.binding_radio(options = [True, False]))

color_condition = alt.condition(selection, alt.ColorValue('red'), alt.ColorValue('steelblue'))

scatter = alt.Chart(df).transform_filter(subgroup).mark_circle(size = 100).encode(
    x= alt.X('Income', title = 'Income ($)'),
    y= alt.Y('Insurance_rate', title = 'Insurance Rate (%)'),
    color = color_condition,
    tooltip = ['Location', 'Insurance_rate', 'Income']
).properties(
    width=600,
    height=400
).add_selection(
    selection, subgroup
)
scatter

In [122]:
# top histogram
top_hist = alt.Chart(df).transform_filter(subgroup).mark_bar().encode(
    x = alt.X('Income:Q', bin = alt.Bin(maxbins = 10), axis = None, scale = alt.Scale(domain = [0, 65000])),
    y = alt.Y('count()', axis = None),
).properties(
    width=600,
    height=50
).add_selection(
    subgroup
)

# density line
top_line = alt.Chart(df).transform_filter(subgroup).transform_density(
    'Income',
    as_ = ['Income', 'density'],
    extent = [0, 65000]
).mark_line().encode(
    x = alt.X('Income:Q', axis = None, scale = alt.Scale(domain = [0, 65000])),
    y = alt.Y('density:Q', axis = None),
    color = alt.value('red')
).properties(
    width=600,
    height=50
)

vlines = alt.Chart(df).transform_filter(subgroup).mark_rule(color = 'red').encode(
    x = alt.X('Income:Q', axis = None, scale = alt.Scale(domain = [0, 65000])),
    size = alt.value(4),
    opacity = alt.condition(selection, alt.value(1), alt.value(0))
).properties(
    width=600,
    height=50
).add_selection(
    selection
)

top = (top_hist + top_line + vlines).resolve_scale(y = 'independent')
top

In [123]:
# right histogram
right_hist = alt.Chart(df).transform_filter(subgroup).mark_bar().encode(
    y = alt.Y('Insurance_rate:Q', bin = alt.Bin(maxbins = 10), axis = None, scale = alt.Scale(domain = [0, 26])),
    x = alt.X('count()', axis = None),
).properties(
    width=50,
    height=400
).add_selection(
    subgroup
)

# density line
right_line = alt.Chart(df).transform_filter(subgroup).transform_density(
    'Insurance_rate',
    as_ = ['Insurance_rate', 'density'],
    extent = [0, 26]
).mark_line(orient = alt.Orientation('horizontal')).encode(
    y = alt.Y('Insurance_rate:Q', axis = None, scale = alt.Scale(domain = [0, 26])),
    x = alt.X('density:Q', axis = None),
    color = alt.value('red')
).properties(
    width=50,
    height=400
)

hlines = alt.Chart(df).transform_filter(subgroup).mark_rule(color = 'red').encode(
    y = alt.Y('Insurance_rate:Q', axis = None, scale = alt.Scale(domain = [0, 26])),
    size = alt.value(4),
    opacity = alt.condition(selection, alt.value(1), alt.value(0)),
).properties(
    width=50,
    height=400
).add_selection(
    selection
)

right = (right_hist + right_line + hlines).resolve_scale(x = 'independent')
right

In [124]:
output = top & (scatter| right).resolve_scale(y = 'shared')
output.properties(title = 'Insurance Rate vs. Income for Disabled People in the US')

In [125]:
st.altair_chart(output)

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)