In [1]:
import altair as alt
import pandas as pd

from vega_datasets import data

In [3]:
# zipcode location info, from the internet
source = data.zipcodes.url
zip_loc = pd.read_csv(source)
zip_loc.head()

Unnamed: 0,zip_code,latitude,longitude,city,state,county
0,501,40.922326,-72.637078,Holtsville,NY,Suffolk
1,544,40.922326,-72.637078,Holtsville,NY,Suffolk
2,601,18.165273,-66.722583,Adjuntas,PR,Adjuntas
3,602,18.393103,-67.180953,Aguada,PR,Aguada
4,603,18.455913,-67.14578,Aguadilla,PR,Aguadilla


In [4]:
# extracted from Superset
psyc_diag = pd.read_csv('diag_v_non_diag_public.csv')
# psyc_diag.head()

In [5]:
# inner join so that only valid zipcodes and those appear in Medicaid are included
psyc_diag_loc = zip_loc.merge(psyc_diag, left_on='zip_code', right_on='zip') # two df have different columns names for zipcode.
psyc_diag_loc.loc[:, 'zip_code'] = psyc_diag_loc['zip_code'].astype(str).str.zfill(5)

# rename for clarity
psyc_diag_loc = psyc_diag_loc.rename(columns={'diag_vs_all_diag':'psyc_diagnosis/all_admission'})


In [6]:
states = alt.topo_feature(data.us_10m.url, feature='states')

background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=1050,
    height=800
)

psyc_zips = alt.Chart(psyc_diag_loc).transform_calculate(
    "County leading digit", alt.expr.substring(alt.datum.zip_code, 0, 1)
).mark_circle(size=600).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    color='psyc_diagnosis/all_admission:Q',
    tooltip=['zip_code:N', 'psyc_diagnosis/all_admission']
).project(
    type='albersUsa'
).properties(
    width=1050,
    height=800
)

background + psyc_zips