In [1]:
import pandas as pd
import pandas as pd
import numpy as np
import os
import glob
from datetime import date, datetime
import plotly.express as px
import ipywidgets as widgets
import IPython.display
from IPython.display import display, clear_output


# Data Cleaning/ Pre-processing

In [2]:
raw_df = pd.read_csv('United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv')
raw_df['submission_date'] = pd.to_datetime(raw_df['submission_date'])
raw_df.head()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
0,2021-02-12,UT,359641,359641.0,0.0,1060,0.0,1785,1729.0,56.0,11,2.0,02/13/2021 02:50:08 PM,Agree,Agree
1,2021-03-01,CO,438745,411869.0,26876.0,677,60.0,5952,5218.0,734.0,1,0.0,03/01/2021 12:00:00 AM,Agree,Agree
2,2020-08-22,AR,56199,,,547,0.0,674,,,11,0.0,08/23/2020 02:15:28 PM,Not agree,Not agree
3,2020-08-12,AS,0,,,0,0.0,0,,,0,0.0,08/13/2020 02:12:28 PM,,
4,2020-06-05,HI,661,,,8,0.0,17,,,0,0.0,06/06/2020 10:31:37 AM,Not agree,Not agree


In [3]:
raw_df.sort_values('submission_date', axis=0, ascending=True, inplace=True)

In [4]:
raw_df = raw_df.rename(columns={"submission_date": "Date", "tot_cases": "Total Cases", "new_case": "Daily New Cases", "state":"State"})

In [5]:
raw_df.head()

Unnamed: 0,Date,State,Total Cases,conf_cases,prob_cases,Daily New Cases,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
9539,2020-01-22,WI,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Agree
6471,2020-01-22,RMI,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Agree
14034,2020-01-22,ID,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Agree
20347,2020-01-22,ND,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Not agree
13260,2020-01-22,MT,0,,,0,,0,,,0,,03/26/2020 04:22:39 PM,Agree,Agree


In [6]:
state_df = raw_df[['Date', 'State', "Total Cases", "Daily New Cases"]]

In [7]:
state_df.head()

Unnamed: 0,Date,State,Total Cases,Daily New Cases
9539,2020-01-22,WI,0,0
6471,2020-01-22,RMI,0,0
14034,2020-01-22,ID,0,0
20347,2020-01-22,ND,0,0
13260,2020-01-22,MT,0,0


In [8]:
state_df.groupby(['Date']).count()

Unnamed: 0_level_0,State,Total Cases,Daily New Cases
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,60,60,60
2020-01-23,60,60,60
2020-01-24,60,60,60
2020-01-25,60,60,60
2020-01-26,60,60,60
...,...,...,...
2021-11-17,60,60,60
2021-11-18,60,60,60
2021-11-19,60,60,60
2021-11-20,60,60,60


In [9]:
country_df = raw_df.groupby(['Date']).agg({'Total Cases':'sum','Daily New Cases':'sum'}).reset_index()

In [10]:
country_df.head()

Unnamed: 0,Date,Total Cases,Daily New Cases
0,2020-01-22,0,0
1,2020-01-23,1,1
2,2020-01-24,2,1
3,2020-01-25,2,0
4,2020-01-26,3,1


In [11]:
country_df.isnull().any()

Date               False
Total Cases        False
Daily New Cases    False
dtype: bool

In [12]:
(country_df['Total Cases'] < 0).values.any()

False

In [13]:
(country_df['Daily New Cases'] < 0).values.any()

False

In [None]:
country_df.to_csv('Data/usa_national_level_daily_new_covid_cases.csv', index=False)

In [None]:
state_df.to_csv('Data/usa_state_level_daily_new_covid_cases.csv', index=False)

In [None]:
raw_df.to_csv('Data/source/raw_daily_state_level_covid_cases.csv', index=False)

In [19]:
state_df['Daily New Cases'].max()

61016

# EDA

## Progression of covid cases with time across the US

In [24]:
fig = px.line(country_df, x="Date", y="Daily New Cases", title="Covid cases reported across the US")
fig.show()

## Statewise analysis of covid cases

In [14]:
states = np.unique(state_df['State'].values)
states

array(['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL',
       'FSM', 'GA', 'GU', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA',
       'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MP', 'MS', 'MT', 'NC', 'ND',
       'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'NYC', 'OH', 'OK', 'OR', 'PA',
       'PR', 'PW', 'RI', 'RMI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VI',
       'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [15]:
prov = states[0]
comp_prov = states[1]

In [21]:
dropdown_state = widgets.Dropdown(options = sorted(states), value=prov, description='Primary State:')
dropdown_comp_state = widgets.Dropdown(options = sorted(states), value=comp_prov, description='Comparison State:')

In [27]:
sorted_df = state_df.copy()

if(comp_prov is None):
    sorted_df["order"] = sorted_df["State"].map({prov: 1}).fillna(2)
else:
    sorted_df["order"] = sorted_df["State"].map({prov: 1, comp_prov: 2}).fillna(3)
sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

fig = px.line(sorted_df, 
        x="Date", 
        y="Daily New Cases", 
        color="State", 
        labels={
             "Date": "Reported Date",
             "Daily New Cases": "Number of new covid cases recorded"
         }, 
        width=800, height=700,
        title="Number of covid cases recorded Across Various Regions in the US")

fig.update_traces({"line":{"color":"lightgrey", "width":2}})

fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                  selector={"legendgroup":prov})

if(comp_prov is not None):
    fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                      selector={"legendgroup":comp_prov})

fig.update_layout(title_text='Number of covid cases recorded Across Various Regions in the US', title_x=0.5,
                showlegend=True,
                yaxis_range=[0,61017],
                yaxis={"visible":True})


def dropdown_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    prov = change.new
    
    sorted_df = state_df.copy()
    
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["State"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["State"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
        x="Date", 
        y="Daily New Cases", 
        color="State", 
        labels={
             "Date": "Reported Date",
             "Daily New Cases": "Number of new covid cases recorded"
         }, 
        width=800, height=700,
        title="Number of covid cases recorded Across Various Regions in the US")
    
    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Number of covid cases recorded Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,61017],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
def dropdown_comp_state_eventhandler(change):
    """
    Eventhandler for the state dropdown widget
    """
    display(input_widgets)
    global prov, comp_prov
    comp_prov = change.new
    sorted_df = state_df.copy()
    
    if(comp_prov is None):
        sorted_df["order"] = sorted_df["State"].map({prov: 1}).fillna(2)
    else:
        sorted_df["order"] = sorted_df["State"].map({prov: 1, comp_prov: 2}).fillna(3)
    sorted_df.sort_values(by=["order", "Date"], ascending=False, inplace=True)

    fig = px.line(sorted_df, 
        x="Date", 
        y="Daily New Cases", 
        color="State", 
        labels={
             "Date": "Reported Date",
             "Daily New Cases": "Number of new covid cases recorded"
         }, 
        width=800, height=700,
        title="Number of covid cases recorded Across Various Regions in the US")

    fig.update_traces({"line":{"color":"lightgrey", "width":2}})

    fig.update_traces(patch={"line":{"color":"blue", "width":3}}, 
                      selector={"legendgroup":prov})

    if(comp_prov is not None):
        fig.update_traces(patch={"line":{"color":"red", "width":3}}, 
                          selector={"legendgroup":comp_prov})

    fig.update_layout(title_text='Number of covid cases recorded Across Various Regions in the US', title_x=0.5,
                    showlegend=True,
                    yaxis_range=[0,61017],
                    yaxis={"visible":True})

    fig.show()
    IPython.display.clear_output(wait=True)            

    
dropdown_state.observe(dropdown_state_eventhandler, names='value')
dropdown_comp_state.observe(dropdown_comp_state_eventhandler, names='value')

input_widgets = widgets.VBox([dropdown_state, dropdown_comp_state])
display(input_widgets)
fig.show()
IPython.display.clear_output(wait=True)    


VBox(children=(Dropdown(description='Primary State:', index=10, options=('AK', 'AL', 'AR', 'AS', 'AZ', 'CA', '…