# IS537 Final Project Part 2
##### Group 12 - Andrew Mo, Qitian He, Yingjie Zhao, Zicheng Li

### 0. Load in Data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data
# import ipywidgets

ModuleNotFoundError: No module named 'vega_datasets'

In [None]:
data_path = 'National_Immunization_Survey_Adult_COVID_Module__NIS-ACM___Vaccination_Status_and_Intent_by_Demographics.csv'

raw_df = pd.read_csv(data_path)

raw_df['Time Interval'] = raw_df['Time Period'] + ', ' + raw_df['Year'].astype(str)

### 1. Functions Definition

In [None]:
# Constants
COL_SELECT = ['Group Category','Indicator Category','Estimate (%)']
DEFAULT_FILTER_DICT = {
    'Geography': 'National',
    'Group Name': 'Race/Ethnicity',
    'Indicator Name': 'Vaccination and intent 4 level grouping',
    'Time Period': 'October 9  –  October 15'
}
STATE_LIST = ['Alaska','Alabama','Arkansas','Arizona','California','Colorado','Connecticut','District of Columbia','Delaware','Florida','Georgia','Hawaii','Iowa','Idaho','Illinois','Indiana','Kansas','Kentucky','Louisiana','Massachusetts','Maryland','Maine','Michigan','Minnesota','Missouri','Mississippi','Montana','North Carolina','North Dakota','Nebraska','New Hampshire','New Jersey','New Mexico','Nevada','New York','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Virginia','Vermont','Washington','Wisconsin','West Virginia','Wyoming']


# data filtering
def data_filter(_df, _filter_dict=DEFAULT_FILTER_DICT):
    _mask = np.ones(_df.shape[0], dtype=bool)
    for key in _filter_dict.keys():
        if type(_filter_dict[key]) == list:
            _mask = _mask & (_df[key].isin(_filter_dict[key]))
        else:
            _mask = _mask & (_df[key]==_filter_dict[key])
    return _df[_mask]


# data pivoting
def data_pivot(_df, _col_select):
    if len(_df) == 0: return None
    FOUR_LEVEL_GROUP = ['Probably or Definitely Will Not Get Vaccinated',\
        'Probably Will Get Vaccinated or Are Unsure',
        'Definitely Will Get Vaccinated',
        'Vaccinated (≥1 dose)']
    tmp = _df[_col_select].drop_duplicates().pivot(
        index=_col_select[0],
        columns=_col_select[1],
        values=_col_select[2]
    )
    return tmp[FOUR_LEVEL_GROUP].sort_values('Vaccinated (≥1 dose)', ascending=False)


# # Basic pivoted data ploting
# def pivot_plot(_df):
#     COLOR_CHOICE = ['crimson','tab:orange', 'lightseagreen','tab:blue']
#     _df.plot.barh(
#         stacked=True,
#         color=COLOR_CHOICE
#     )
#     plt.legend(bbox_to_anchor=(1.0, 1.0))
#     plt.show()

### 2. Dataset Preparation

In [None]:
# Vega dataset - states
states = alt.topo_feature(data.us_10m.url, 'states')

filter_states = {
    'Geography': STATE_LIST,
    'Group Name': 'All adults 18+',
    'Indicator Name': 'Vaccination and intent 4 level grouping',
    'Indicator Category': 'Vaccinated (≥1 dose)',
    'Time Period': 'August 28 – September 30'
}

df_states = data_filter(raw_df, filter_states).copy().sort_values(by=['Geography']).reset_index(drop=True)
df_states.loc[:,'id'] = pd.read_csv(data.population_engineers_hurricanes.url)['id'][:-1]

geo_col = ['Geography', 'id', 'Estimate (%)']
df_states = df_states[geo_col]

# # >>> Heat map dataset
# df_states

In [None]:
# # Old Color Scheme
# COLOR_DOMAIN = ['Probably or Definitely Will Not Get Vaccinated',
#        'Probably Will Get Vaccinated or Are Unsure', 'Definitely Will Get Vaccinated', 'Vaccinated (≥1 dose)']
# COLOR_RANGE = ['crimson', 'salmon', 'gold', 'steelblue']

filter_bar_race = {
    'Geography': STATE_LIST,
    'Group Name': 'Race/Ethnicity',
    'Indicator Name': 'Vaccination and intent 4 level grouping',
    'Indicator Category': 'Vaccinated (≥1 dose)',
    'Time Period': 'August 28 – September 30'
}

df_bar = data_filter(raw_df, filter_bar_race)[['Geography'] + COL_SELECT]
df_bar = df_bar[~df_bar['Estimate (%)'].isnull()]

# # >>> Bar plot dataset dataset
# df_bar

### 3. Altair Plot Definition

In [None]:
input_dropdown = alt.binding_select(options=STATE_LIST, name='State')
selection = alt.selection_single(fields=['Geography'], bind=input_dropdown)

# Grey US Map background
background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=550,
    height=500
).project('albersUsa')

# Heap map - Vaccination Rate Estimates
heat_map_alt = alt.Chart(df_states).mark_geoshape().encode(
    shape='geo:G',
    color='Estimate (%):Q',
    tooltip=['Geography:N', 'Estimate (%):Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=states, key='id'),
    as_='geo'
).properties(
    width=550,
    height=500
).project(
    type='albersUsa'
).add_selection(
    selection
).transform_filter(
    selection
)

# background + heat_map_alt

In [None]:
COLOR_DOMAIN = ['Vaccinated (≥1 dose)']
COLOR_RANGE = ['lightseagreen']

bar_plot_alt = alt.Chart(df_bar).mark_bar().encode(
    x='mean(Estimate (%)):Q',
    y='Group Category:N',
    # color='Indicator Category:N',
    color=alt.Color('Indicator Category:N', scale=alt.Scale(
        domain=COLOR_DOMAIN,
        range=COLOR_RANGE),
        sort=alt.EncodingSortField('order', order='descending')),
    tooltip=['Indicator Category:N', 'mean(Estimate (%)):Q']
).properties(
    width=300,
    height=500
).add_selection(
    selection
).transform_filter(
    selection
)

# bar_plot_alt

### 4. Dashboard

In [None]:
dashboard = background + heat_map_alt | bar_plot_alt
dashboard

### Write-up
One paragraph explaining how to use the dashboard you created, to help someone who is not an expert understand your dataset.

As shown above, this is the dashboard we built for the National Immunization Survey dataset. On the left side, we have the United States geographical heat map with Vaccination Rate Estimates of each state as values display in different colors. The color atla besides the heat map indicates that the higher Vaccination Rate Estimates value, the deeper the color. On the right side, we have a histogram using the Mean of Estimate as the horizontal axis and the Race/Ethnicity group categories. To use the dashboard, we can choose a state by clicking on the dropbox, and the chosen state will highlight on the map while the right plot display its Vaccination Rate Estimates value of different race/ethnicity group interactively.

### Contextual dataset 1

 Title: Provisional COVID-19 Deaths by Sex and Age

 Link: https://data.cdc.gov/NCHS/Provisional-COVID-19-Deaths-by-Sex-and-Age/9bhg-hcku

This contextual dataset records the convid-19 deaths by sex, age, state, and date. In the part 1, our group decided to use the dataset of Vaccination Status and Intent by Demographics. Both of these two datasets is about the covid-19, but one includes immunization records and the other has death reports. Hence, our group thinks the contextual dataset would be helpful for our project. For instance, we can see identify the high-risk group based on the data of contextual datase, and see the relationship with the vaccination status.

### Contextual dataset 2

Title: COVID-19 Vaccine Delivery
Link: https://catalog.data.gov/dataset/covid-19-vaccine-delivery

This dataset provides good, accurate and timely information on vaccine delivery, and the amount of vaccine available may have an impact on changes in vaccine uptake. We think this data set can be a good help to verify some conclusions - for example: whether the number of people vaccinated in a certain period is low because there is not enough vaccine in this local area.