# Implementation
This notebook takes in cleaned data from data_cleaning.ipynb and creates all visualizations in dashboard.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
import geopandas as gpd
from altair import datum

In [2]:
# read in cleaned data
df = pd.read_csv('final_data')

In [3]:
# create treatment column that determines whether an individual received all treatment they needed or need was unmet
df.loc[df['hltsrv'].isin(['Prescriptions', 'Outpatient and Prescriptions', 'Outpatient', 'Inpatient, Outpatient and Prescriptions', 'Inpatient','Inpatient and Prescriptions','Inpatient and Outpatient']), 'treatment'] = 'Treatment Need Met'
df.loc[df['unmet_need'] == 'Yes', 'treatment'] = 'Unmet Need'

df2 = df.dropna(subset = ['treatment'])

In [4]:
# disable max rows error
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Remove records with 'skipped' or unneeded data

In [5]:
df3 = df.dropna(subset = ['amt_paid'])
df3 = df3[df3['amt_paid'].isin(['Less than $100',
                                '$100 to $200',
                                '$201 to $500',
                                '$501 to $900',
                                '$901 to $1,500',
                                '1,501 to $2,000',
                                '$2,001 to $5,000',
                                'More than $5,000'])]

In [1]:
# create list for sorting categorical data in logical order
sort1 = ['Less than $10,000 (including loss)',
                         '$10,000 to $19,999',
                         '$20,000 to $29,999',
                         '$30,000 to $39,999',
                         '$40,000 to $49,999',
                         '$50,000 to $74,999',
                         '$75,000 or more']
sort2 = ['More than $5,000',
        '$2,001 to $5,000',
        '1,501 to $2,000',
        '$901 to $1,500',
        '$501 to $900',
        '$201 to $500',
        '$100 to $200',
        'Less than $100']

In [7]:
insurers = df3['hltin'].unique()
insurers

array(['Group Offered Health Insurance', 'None',
       'Private Health Insurance', 'Medicaid', 'Medicare',
       'Medicaid and Group Offered Health Insurance', 'Military',
       'Medicare and Group Offered Health Insurance',
       'Medicare and Medicaid',
       'Military and Group Offered Health Insurance',
       'Medicaid and Private Health Insurance', 'Medicare and Military',
       'Medicare, Medicaid, and Group Offered Health Insurance',
       'Medicare and Private Health Insurance',
       'Medicare, Medicaid, Military, and Group Offered Health Insurance',
       'Military and Private Health Insurance',
       'Medicaid, Military, and Group Offered Health Insurance',
       'Medicaid and Military',
       'Medicare, Medicaid, and Private Health Insurance',
       'Medicare, Military, and Private Health Insurance'], dtype=object)

### Reasons for no care - additional data preparation

In [13]:
no_care = pd.DataFrame(df['no_care_reasons'].value_counts()[3:15]).reset_index()

In [14]:
no_care_list = no_care['index'].to_list()

In [15]:
df4 = df2[df2['no_care_reasons'].isin(no_care_list)]

## Dashboard

In [37]:
# Drop down menu for type of insurance
dropdown = alt.binding_select(options = insurers, name = 'Insurance Type')

select = alt.selection_single(fields = ['hltin'], bind = dropdown,  name = 'Insurance Type' )

source = df3

# interaction for clicking on each data point for each year
click = alt.selection_multi(encodings = ['x'] )
# interaction for clicking on data point
multi = alt.selection_multi()

# bar graph for utilization by year
time_bar = alt.Chart(source).mark_bar().encode(
    x = alt.X('year:O', axis = alt.Axis(grid = True)),
    y = alt.Y('count(hltsrv):Q', title = "Count of Service Utilization"),
    tooltip = ['year','count(hltsrv)'],
    color = alt.condition(multi, 'count(year)', alt.value('lightgray'))
).add_selection(click).add_selection(multi).properties(title = 'Mental Health Utilization over Time', height = 200, width = 500)

# Treatment Needs stacked normalized bar chart
source = df2
bar = alt.Chart(source).transform_joinaggregate(
    total = 'count(*)'
).transform_calculate(
    percent = '1/datum.total'
).mark_bar(size = 10).encode(
    x = alt.X('sum(percent):Q', title = 'Percent', stack = 'normalize'), 
    y= alt.Y('sex:N', title = ''),
    row = alt.Row('race:N', title = '', header = alt.Header(labelAlign = 'left', labelAngle = 0, labelFontWeight = 'bold')),
    color = alt.Color('treatment:N', scale = alt.Scale(
            domain = ('Treatment Need Met', 'Unmet Need'), 
            range=["#043694", "#a1d6ed"]), legend = alt.Legend(title = 'Treatment Needs'))
).properties(width = 300, height = 30,title = 'Treatment Needs').transform_filter(click)

# Bubble graph for amount paid by income, race, and gender
source = df3
circle = alt.Chart(source).mark_circle().encode(
    x = alt.X('fam_income', title = "Family Income", sort = sort1, axis = alt.Axis(labelAngle = -45, grid = True)), 
    y= alt.Y('amt_paid', title = 'Amount Paid', sort = sort2 , axis = alt.Axis(grid = True)),
    size = alt.Size('count()'),
    color = alt.Color('count()'),
    tooltip = 'count()'
).properties(height = 300,width = 300, title = 'Amount Paid Out of Pocket for Mental Health Care by Income Level'
).add_selection(select).transform_filter(select).transform_filter(click)

# Bar graph for utilization by income level
bar2 = alt.Chart(source).mark_bar().encode(
    x = alt.X('fam_income', title = 'Family Income', sort = sort1, axis = alt.Axis(labelAngle = -45)),
    y = alt.Y('count(hltsrv):Q'),
    color = alt.Color('hltsrv:N', legend = alt.Legend(title = 'Type of Service'),scale = alt.Scale(range = ['#82C0CC', '#EDE7E3', '#FFA62B', '#DCABDF', '#a1d6ed','#043694'])),
    tooltip = ['count(hltsrv):Q']
).properties(title = 'Mental Health Service Utilization by Income Level', width = 300
).add_selection(select).transform_filter(select).transform_filter(click)

# Interaction for choropleth map dropdown menu
types = pd.unique(df_all.Type)
input_dropdown = alt.binding_select(options=types, name = 'Type')
type_selector = alt.selection_single(fields = ['Type'], bind=input_dropdown, init = {'Type':'Any Mental Illnesses'})

# Choropleth
map = alt.Chart(df_all).mark_geoshape(stroke='white')\
    .encode(
    color = alt.Color('Cases:Q', legend = alt.Legend(title = 'Cases')),
    tooltip = ['year:O', 'State:N', 'Cases:Q'])\
    .properties(projection={'type': 'albersUsa'},width=500,height=350, title = 'Geographical Distribution of US Mental Health Care Utilization')\
    .add_selection(type_selector).transform_filter(type_selector).transform_filter(click)

# No Care Reasons bar
source = df4
nc = alt.Chart(source).mark_bar(color = '#a8a9ad').encode(
    x = alt.X('count():Q'),
    y = alt.Y('no_care_reasons:N', sort = '-x', title = 'Reasons for No Care', axis = alt.Axis(labels = False)),
    tooltip = ['no_care_reasons']
).transform_filter(click).properties(title = 'Reasons for No Care', width = 200, height = 350)

nc = nc.add_selection(alt.selection_single())

dashboard = (time_bar & (bar | nc) & map | circle & bar2).properties(title = 'Mental Health Services in the US').configure_title(anchor = 'middle')


In [38]:
dashboard.save('final_dashboard.html')