In [4]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
import plotly.graph_objs as go

In [5]:
#read super Covid-19 dataframe with state as index. 
USCovid = pd.read_csv("../../../data/output/covid.csv", index_col=0)

### Clean Data

#### Drop unnecessary data

In [6]:
# Clean
USCovid.drop(["Unnamed: 0", "countyFIPS", "stateFIPS"],
             axis=1,
             errors='ignore',
             inplace=True)
USCovid.drop(
    USCovid.loc[USCovid['County Name'] == "Statewide Unallocated"].index,
    inplace=True)
regex_cases = '(^[0-9]+[/]+[0-9]+[/]+[0-9]+[_]+[x])|^County Name$|^State$|^population$'
regex_deaths = '(^[0-9]+[/]+[0-9]+[/]+[0-9]+[_]+[y])|^County Name$|^State$|^population$'
USCases = USCovid.filter(regex=regex_cases)
USDeaths = USCovid.filter(regex=regex_deaths)

#### Find new cases

In [7]:
def diff(df):
    new = [df[0], df[1], df[2], df[3]]
    for i in range(4, df.shape[0]):
        new.append(df[i] - df[i - 1])
    return pd.Series(new)

In [None]:
columns_D = USDeaths.columns
columns_C = USCases.columns
USDeaths = USDeaths.reset_index(drop=True).T.apply(diff).T
USDeaths.columns = columns_D
USCases = USCases.reset_index(drop=True).T.apply(diff).T
USCases.columns = columns_C

#### Convert date type from String to datetime Timestamp

In [None]:
# Group by State then fixing date string syntax
USDeaths_byStates = USDeaths.groupby('State').sum()
USDeaths_byStates = USDeaths_byStates.rename(
    columns=lambda x: (x.split('_')[0]),
    errors='raise')
USCases_byStates = USCases.groupby('State').sum()
USCases_byStates = USCases_byStates.rename(
    columns=lambda x: (x.split('_')[0]),
    errors='raise')

In [None]:
# Split States and population from the data (temparary)
USDeaths_byStates_SP = USDeaths_byStates.reset_index()[['County Name', 'State', 'population']]
USDeaths_byStates_Date = USDeaths_byStates.reset_index().drop(
    ['County Name', 'State', 'population'], axis=1)

USCases_byStates_SP = USCases_byStates.reset_index()[['County Name', 'State', 'population']]
USCases_byStates_Date = USCases_byStates.reset_index().drop(
    ['County Name', 'State', 'population'], axis=1)

In [None]:
# Convert String to TimeStamp
USDeaths_byStates_Date = USDeaths_byStates_Date.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

USCases_byStates_Date = USCases_byStates_Date.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

##### Resample date from days to weeks and find the mean of week

In [None]:
USDeaths_byStates_Date_weeks = np.ceil(USDeaths_byStates_Date.resample('W', axis='columns').mean())
USCases_byStates_Date_weeks = np.ceil(USCases_byStates_Date.resample('W', axis='columns').mean())

In [None]:
USDeaths_States_weeks = pd.merge(USDeaths_byStates_SP, USDeaths_byStates_Date_weeks, left_index=True, right_index=True)
USCases_States_weeks = pd.merge(USCases_byStates_SP, USCases_byStates_Date_weeks, left_index=True, right_index=True)

#### Find Means, Meidan and Mode for the weeks

In [None]:
# Deaths
USDeaths_means = USDeaths_byStates_Date_weeks.T.mean()
USDeaths_median = USDeaths_byStates_Date_weeks.T.median()
USDeaths_mode = USDeaths_byStates_Date_weeks.T.mode()
USDeaths_mode = USDeaths_mode.values.tolist()
USDeaths_list = [
     USDeaths_byStates_SP['State'], USDeaths_means, USDeaths_median, pd.Series(USDeaths_mode[0])
]

USDeaths_results = pd.DataFrame(USDeaths_list).transpose()
USDeaths_results.columns = ['State', 'Mean', 'Median', 'Mode']

# Cases
USCases_means = USCases_byStates_Date_weeks.T.mean()
USCases_median = USCases_byStates_Date_weeks.T.median()
USCases_mode = USCases_byStates_Date_weeks.T.mode()
USCases_mode = USCases_mode.values.tolist()
USCases_list = [
     USCases_byStates_SP['State'], USCases_means, USCases_median, pd.Series(USCases_mode[0])
]

USCases_results = pd.DataFrame(USCases_list).transpose()
USCases_results.columns = ['State', 'Mean', 'Median', 'Mode']

#### Plot data

In [None]:
trace0 = go.Scatter(x=USCases_results['State'], y=USCases_results['Mean'], 
                    name="US", 
                    mode="lines+markers") 
mydata = [trace0]

mylayout = go.Layout(
    title="Weekly Cases"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')

In [None]:
trace0 = go.Scatter(x=USDeaths_results['State'], y=USDeaths_results['Mean'], 
                    name="US", 
                    mode="lines+markers") 
mydata = [trace0]

mylayout = go.Layout(
    title="Weekly Deaths"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')