In [1]:
import pandas as pd
import numpy as np
import os
import glob
from datetime import date, datetime
import plotly.express as px
import ipywidgets as widgets
import IPython.display
from IPython.display import display, clear_output


In [2]:
df1 = pd.read_csv('Data/covid19_vaccination_data_US_full.csv')

In [6]:
df1['Administered'].head()

0    449955588
1      8365950
2     11076817
3      1386520
4      1295820
Name: Administered, dtype: int64

In [5]:
df1['Administered'].describe()

count    2.216800e+04
mean     7.739341e+06
std      3.535235e+07
min      0.000000e+00
25%      3.116915e+05
50%      1.697588e+06
75%      5.372968e+06
max      4.499556e+08
Name: Administered, dtype: float64

## Data Collection and Preprocessing

* The data was collected from the official United States Centers for Disease Control and Prevention (CDC) website. It can be downloaded [here](https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-Jurisdi/unsk-b7fc/data). 

* For our analysis, we consider data from 13 December, 2020 till 20 November, 2021 across all states, territories and federal units in the United States. 

* For pre-processing, we removed data for dates outside the set interval. 

* For states which started the vaccination process late, their vaccination data was set to 0. 

* Apart from that, not much data cleaning was required since the data is well populated and does not have any anomalies like NaN or Null values.

In [14]:
df1 = pd.read_csv('Data/covid19_vaccination_data_US_full.csv')

df1 = df1[df1.Location != "US"]
province_list = np.unique(df1['Location'].values)
# print(province_list)

prov = province_list[2]
comp_prov = province_list[4]

dropdown_state = widgets.Dropdown(options = sorted(province_list), value=prov, description='State 1:')
dropdown_comp_state = widgets.Dropdown(options = sorted(province_list), value=comp_prov, description='State 2:')


sorted_df = df1.copy()
sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
# sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
if(comp_prov is None):
    sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
else:
    sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

fig = px.line(sorted_df, 
        x="Date", 
        y="Admin_Per_100K", 
        color="Location", 
        labels={
             "Date": "Date Administered",
             "Admin_Per_100K": "Vaccine Doses Administered",
         }, 
        width=800, height=700,
        title="Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US")

fig.update_traces({"line":{"color":"lightgrey", "width":2}})

fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                  selector={"legendgroup":prov})

if(comp_prov is not None):
    fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                      selector={"legendgroup":comp_prov})

fig.update_layout(title_text='Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US', title_x=0.5,
                showlegend=True,
                yaxis_range=[0,200000],
                yaxis={"visible":True})


def dropdown_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    prov = change.new
    
    sorted_df = df1.copy()
    sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
    # sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
            x="Date", 
            y="Admin_Per_100K", 
            color="Location", 
            labels={
                 "Date": "Date Administered",
                 "Admin_Per_100K": "Vaccine Doses Administered"
             }, 
            width=800, height=700,
            title="Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US")

    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,200000],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
def dropdown_comp_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    comp_prov = change.new
    sorted_df = df1.copy()
    sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
    # sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
            x="Date", 
            y="Admin_Per_100K", 
            color="Location", 
            labels={
                 "Date": "Date Administered",
                 "Admin_Per_100K": "Vaccine Doses Administered"
             }, 
            width=800, height=700,
            title="Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US")

    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Admininstration of Covid Vaccines per 100K Population Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,200000],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
dropdown_state.observe(dropdown_state_eventhandler, names='value')
dropdown_comp_state.observe(dropdown_comp_state_eventhandler, names='value')

input_widgets = widgets.VBox([dropdown_state, dropdown_comp_state])
display(input_widgets)
fig.show()
IPython.display.clear_output(wait=True)    




VBox(children=(Dropdown(description='State 1:', index=2, options=('AK', 'AL', 'AR', 'AS', 'AZ', 'BP2', 'CA', '…

## Plot Description

The plot shows a comparison between the vaccine administration per 100K population between two states. We can clearly see that the Republic of Palau (RP) (which falls in the U.S. Pacific Islands) has the highest number of total vaccine doses administered per 100K population, followed by Vermont, and Puerto Rico. Marshall Islands (MH) and Federated States of Micronesia (FM) have the lowest numbers. 

In [16]:

df1 = df1[df1.Location != "US"]
province_list = np.unique(df1['Location'].values)
# print(province_list)

prov = province_list[2]
comp_prov = province_list[4]

dropdown_state = widgets.Dropdown(options = sorted(province_list), value=prov, description='State 1:')
dropdown_comp_state = widgets.Dropdown(options = sorted(province_list), value=comp_prov, description='State 2:')


sorted_df = df1.copy()
sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
# sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
if(comp_prov is None):
    sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
else:
    sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

fig = px.line(sorted_df, 
        x="Date", 
        y="Administered", 
        color="Location", 
        labels={
             "Date": "Date Administered",
             "Administered": "Vaccine Doses Administered",
         }, 
        width=800, height=700,
        title="Total Admininstration of Covid Vaccines Across Various Regions in the US")

fig.update_traces({"line":{"color":"lightgrey", "width":2}})

fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                  selector={"legendgroup":prov})

if(comp_prov is not None):
    fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                      selector={"legendgroup":comp_prov})

fig.update_layout(title_text='Total Admininstration of Covid Vaccines Across Various Regions in the US', title_x=0.5,
                showlegend=True,
                yaxis_range=[0,58500000],
                yaxis={"visible":True})


def dropdown_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    prov = change.new
    
    sorted_df = df1.copy()
    sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
    # sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
            x="Date", 
            y="Administered", 
            color="Location", 
            labels={
                 "Date": "Date Administered",
                 "Administered": "Vaccine Doses Administered"
             }, 
            width=800, height=700,
            title="Total Admininstration of Covid Vaccines Across Various Regions in the US")

    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Total Admininstration of Covid Vaccines Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,58500000],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
def dropdown_comp_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    comp_prov = change.new
    sorted_df = df1.copy()
    sorted_df["Date"] = pd.to_datetime(sorted_df["Date"])
    # sorted_df["order"] = sorted_df["Location"].map({"US": 1, "CO": 2}).fillna(3)
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["Location"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["Location"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
            x="Date", 
            y="Administered", 
            color="Location", 
            labels={
                 "Date": "Date Administered",
                 "Administered": "Vaccine Doses Administered"
             }, 
            width=800, height=700,
            title="Total Admininstration of Covid Vaccines Across Various Regions in the US")

    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Total Admininstration of Covid Vaccines Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,58500000],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
dropdown_state.observe(dropdown_state_eventhandler, names='value')
dropdown_comp_state.observe(dropdown_comp_state_eventhandler, names='value')

input_widgets = widgets.VBox([dropdown_state, dropdown_comp_state])
display(input_widgets)
fig.show()
IPython.display.clear_output(wait=True)    


VBox(children=(Dropdown(description='State 1:', index=2, options=('AK', 'AL', 'AR', 'AS', 'AZ', 'BP2', 'CA', '…

## Plot Description

The plot shows a comparison between the total vaccine administration trends between two states. We can clearly see that California has the highest number of total vaccine doses administered, followed by states like Texas, Florida and New York. However, a majority of the other states have low absolute numbers due to the low population in these states. 

## EDA -- Consumer Spending

* The stock prices of Visa, Mastercard, and American Express drop from around the 20th February 2020. This was the time when news of Covid-19 began to spread rapidly around the world, and the first cases of Covid-19 were being report in the US. The fall in stock prices continued to plummet, and these companies hit their lowest values around 23rd March 2020.
<br><br>
* In the subsequent months, due to the lockdown in the US, the consumer and corporate spending was largely very low. This is reflected in the stock prices of these companies which were recovering very slowly from the initial drop due to panic created by the onset of Covid-19.
<br><br>
* We see that the stocks begin to rise as the lockdown eased in July 2020. This shows that people and businesses started spending more and there was an increase in the econonmic activity with the easing of restrictions.
<br><br>
* When the first wave began around end of October 2020, we saw a sudden falling of the stock prices for these companies, as the covid cases begin to rise. This was due to the fact that businesses were forced to shut down or slow down, and consumers were forced to stay at home. Through the first wave, ie. from November 2020 to January 2021, the stock prices remain relatively stagnant with no growth especially for Mastercard and Visa as business and consumer spending was low.
<br><br>
* We clearly see from the graph that as the vaccination picks up starting February 2021, the prices for these stocks continues to rise. This shows that vaccination instilled confidence in people, who began moving back to their pre-covid spending behavour.
<br><br>
* With the second wave starting around July 2021, the stock prices again began to drop for Visa and Mastercard, which they did not show any growth for American Express, reflecting the spending slowdown by consumers and corporates.
