# Stage II Member Tasks

### Import libraries

In [7]:
import os
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
import plotly.graph_objs as go
import scipy.stats as stats
import plotly.tools as tls
from IPython.display import display
import chart_studio.plotly as py
from plotly.graph_objs import Bar, Scatter, Marker, Layout 
from plotly.offline import iplot

### Read super Covid-19 and Enrichment data

In [8]:
#read super Covid-19 dataframe with state as index. 
USCovid = pd.read_csv("../../../data/output/covid.csv", index_col=0)
Bed = pd.read_csv("../../../data/output/COVID19_HOSBEDS_MERGE.csv", index_col=0)

# to save static images for Plotly
if not os.path.exists("images"):
    os.mkdir("images")

### Clean Data
Drop unnecessary data

In [9]:
# Clean
USCovid.drop(["Unnamed: 0", "countyFIPS", "stateFIPS"],
             axis=1,
             errors='ignore',
             inplace=True)
USCovid.drop(
    Bed.loc[USCovid['County Name'] == ("Statewide Unallocated" or 'New York City Unallocated')].index,
    inplace=True)

Bed.drop(["Unnamed: 0", "countyFIPS", "stateFIPS"],
             axis=1,
             errors='ignore',
             inplace=True)
Bed.drop(
    Bed.loc[Bed['County Name'] == ("Statewide Unallocated" or 'New York City Unallocated')].index,
    inplace=True)
Bed.reset_index(drop=True, inplace=True)
regex_cases = '(^[0-9]+[/]+[0-9]+[/]+[0-9]+[_]+[x])|^County Name$|^State$|^population$'
regex_deaths = '(^[0-9]+[/]+[0-9]+[/]+[0-9]+[_]+[y])|^County Name$|^State$|^population$'
USCases = USCovid.filter(regex=regex_cases)
USDeaths = USCovid.filter(regex=regex_deaths)

#### Calculate the daily new cases/death

In [10]:
def diff(df):
    new = [df[0], df[1], df[2], df[3]]
    for i in range(4, df.shape[0]):
        new.append(df[i] - df[i - 1])
    return pd.Series(new)

In [11]:
columns_D = USDeaths.columns
columns_C = USCases.columns

In [12]:
County_State_Pop_columns = USDeaths[USDeaths.columns[0:3]]
County_State_Pop_columns = USDeaths[USDeaths.columns[0:3]]

USNew_Deaths = County_State_Pop_columns.merge(USDeaths[USDeaths.columns[3:]].diff(axis=1).fillna(0), left_index =True, right_index=True)
USNew_Cases = County_State_Pop_columns.merge(USCases[USCases.columns[3:]].diff(axis=1).fillna(0), left_index =True, right_index=True)

#### Convert date type from String to datetime Timestamp

In [13]:
# fixing date string syntax
USNew_Deaths = USNew_Deaths.rename(
    columns=lambda x: (pd.to_datetime((x.split('_')[0]), errors='ignore')),
    errors='raise')


USNew_Cases = USNew_Cases.rename(
    columns=lambda x: (pd.to_datetime((x.split('_')[0]), errors='ignore')),
    errors='raise')

In [14]:
US_New_Cases = USNew_Cases
US_New_Death = USNew_Deaths

In [15]:
USDeaths_byStates = USNew_Deaths.groupby('State').sum()
USCases_byStates = USNew_Cases.groupby('State').sum()

In [16]:
# Split States and population from the data (temparary)
USDeaths_byStates_SP = USDeaths_byStates.reset_index()[['State', 'population']]
USDeaths_byStates_Date = USDeaths_byStates.reset_index().drop(
    ['State', 'population'], axis=1)

USCases_byStates_SP = USCases_byStates.reset_index()[['State', 'population']]
USCases_byStates_Date = USCases_byStates.reset_index().drop(
    [ 'State', 'population'], axis=1)

### Normalize the data by 1M population

In [17]:
norm_val =1_000_000

x = USDeaths_byStates_Date.astype(float).divide(USDeaths_byStates_SP['population'], axis=0)
USDeaths_byStates_Date_norm = x.multiply(norm_val)

y = USCases_byStates_Date.astype(float).divide(USCases_byStates_SP['population'], axis=0)
USCases_byStates_Date_norm = y.multiply(norm_val)

In [18]:
# Convert String to TimeStamp
USDeaths_byStates_Date_norm = USDeaths_byStates_Date_norm.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

USCases_byStates_Date_norm = USCases_byStates_Date_norm.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

##### Resample date from days to weeks and find the mean of week

In [19]:
USDeaths_byStates_Date_weeks_norm = USDeaths_byStates_Date_norm.resample('W', axis=1).mean().round()
USCases_byStates_Date_weeks_norm = USCases_byStates_Date_norm.resample('W', axis=1).mean().round()

In [20]:
USDeaths_States_weeks_norm = pd.merge(USDeaths_byStates_SP, USDeaths_byStates_Date_weeks_norm, left_index=True, right_index=True)
USCases_States_weeks_norm  = pd.merge(USCases_byStates_SP, USCases_byStates_Date_weeks_norm, left_index=True, right_index=True)

#### Find Means, Meidan and Mode for the weeks

In [21]:
# Deaths
USDeaths_means_norm = USDeaths_byStates_Date_weeks_norm.T.mean().round()
USDeaths_median_norm = USDeaths_byStates_Date_weeks_norm.T.median().round()
USDeaths_mode_norm = USDeaths_byStates_Date_weeks_norm.T.mode().round()
USDeaths_mode_norm = USDeaths_mode_norm.values.tolist()
USDeaths_list = [
     USDeaths_byStates_SP['State'], USDeaths_means_norm, USDeaths_median_norm, pd.Series(USDeaths_mode_norm[0])
]

USDeaths_results_norm = pd.DataFrame(USDeaths_list).transpose()
USDeaths_results_norm.columns = ['State', 'Mean', 'Median', 'Mode']

# Cases
USCases_means_norm = USCases_byStates_Date_weeks_norm.T.mean().round()
USCases_median_norm = USCases_byStates_Date_weeks_norm.T.median().round()
USCases_mode_norm = USCases_byStates_Date_weeks_norm.T.mode().round()
USCases_mode_norm = USCases_mode_norm.values.tolist()
USCases_list = [
     USCases_byStates_SP['State'], USCases_means_norm, USCases_median_norm, pd.Series(USCases_mode_norm[0])
]

USCases_results_norm = pd.DataFrame(USCases_list).transpose()
USCases_results_norm.columns = ['State', 'Mean', 'Median', 'Mode']

#### Plot data

In [70]:
trace0 = go.Scatter(x=USCases_results_norm.loc[0:10]['State'], y=USCases_results_norm.loc[0:10]['Mean'], 
                    name="Mean", 
                    mode="lines+markers") 
trace1 = go.Scatter(x=USCases_results_norm.loc[0:10]['State'], y=USCases_results_norm.loc[0:10]['Median'], 
                    name="Median", 
                    mode="lines+markers") 
trace2 = go.Scatter(x=USCases_results_norm.loc[0:10]['State'], y=USCases_results_norm.loc[0:10]['Mode'], 
                    name="Mode", 
                    mode="lines+markers") 
mydata = [trace0, trace1, trace2]

mylayout = go.Layout(
    title="States Weekly Cases"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')

fig.write_image("images/fig1.png")


In [71]:
trace0 = go.Scatter(x=USDeaths_results_norm.loc[0:10]['State'], y=USDeaths_results_norm.loc[0:10]['Mean'], 
                    name="Mean", 
                    mode="lines+markers") 
trace1 = go.Scatter(x=USDeaths_results_norm.loc[0:10]['State'], y=USDeaths_results_norm['Median'], 
                    name="Median", 
                    mode="lines+markers") 
trace2 = go.Scatter(x=USDeaths_results_norm.loc[0:10]['State'], y=USDeaths_results_norm.loc[0:10]['Mode'], 
                    name="Mode", 
                    mode="lines+markers") 
mydata = [trace0, trace1, trace2]

mylayout = go.Layout(
    title="States Weekly Deaths"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')
fig.write_image("images/fig2.png")


####  

### Counites

In [79]:
FL_NewDeaths = USNew_Deaths[USNew_Deaths['State'] == 'FL']
FL_NewCases = USNew_Cases[USNew_Cases['State'] == 'FL']

zer = FL_NewCases[FL_NewCases.population != 0]
FL_NewCases_SP = zer.reset_index()[['County Name','State', 'population']]
FL_NewCases_Date = zer.reset_index().drop(
    ['County Name', 'State', 'population', 'index'], axis=1)

zer1 = FL_NewDeaths[FL_NewDeaths.population != 0]
FL_NewDeaths_SP = zer1.reset_index()[['County Name','State', 'population']]
FL_NewDeaths_Date = zer1.reset_index().drop(
    ['County Name', 'State', 'population', 'index'], axis=1)

### Normalize the data

In [25]:
norm_val = 1_000
norm_val2 = 10_000
x = FL_NewCases_Date.divide(FL_NewCases_SP['population'], axis=0)
FL_NewCases_Date_norm = x.multiply(norm_val).round()

y = FL_NewDeaths_Date.astype(float).divide(FL_NewDeaths_SP['population'], axis=0)
FL_NewDeaths_Date_norm = y.multiply(norm_val2).round()

In [26]:
_NewCases = FL_NewCases_SP.merge(pd.DataFrame(FL_NewCases_Date_norm.sum(axis=1)), left_index=True, right_index=True)
_NewDeaths = FL_NewDeaths_SP.merge(pd.DataFrame(FL_NewDeaths_Date_norm.sum(axis=1)), left_index=True, right_index=True)

### finding the top 5 counties cases and death

In [27]:
# find the top 5 counites
FL_NewCases_top5 = FL_NewCases_SP.merge(pd.DataFrame(FL_NewCases_Date_norm.sum(axis=1)), left_index=True, right_index=True)
FL_NewCases_top5.rename(columns={0: 'Normalized Cases'}, inplace=True)

FL_NewDeaths_top5 = FL_NewDeaths_SP.merge(pd.DataFrame(FL_NewDeaths_Date_norm.sum(axis=1)), left_index=True, right_index=True)
FL_NewDeaths_top5.rename(columns={0: 'Normalized Deaths'}, inplace=True)

In [28]:
FL_NewCases_top5 = FL_NewCases_top5.nlargest(5, ['Normalized Cases'])
FL_NewDeaths_top5 = FL_NewDeaths_top5.nlargest(5, ['Normalized Deaths'])

In [29]:
FL_NewCases_top5.head()

Unnamed: 0,County Name,State,population,Normalized Cases
32,Lafayette County,FL,8422,138.0
62,Union County,FL,15237,68.0
21,Gulf County,FL,13639,62.0
37,Liberty County,FL,8354,53.0
18,Gadsden County,FL,45660,52.0


In [30]:
FL_NewDeaths_top5.head()

Unnamed: 0,County Name,State,population,Normalized Deaths
62,Union County,FL,15237,23.0
38,Madison County,FL,18493,21.0
32,Lafayette County,FL,8422,17.0
19,Gilchrist County,FL,18582,15.0
6,Calhoun County,FL,14105,14.0


In [31]:
FL_NewCases_top5_daily = FL_NewCases_SP.merge(FL_NewCases_Date_norm, left_index=True, right_index=True)
FL_NewCases_top5_daily =FL_NewCases[FL_NewCases['County Name'].isin(['Lafayette County', 'Union County', 'Gulf County', 'Liberty County', 'Gadsden County'])]

FL_NewDeaths_top5_daily = FL_NewDeaths_SP.merge(FL_NewDeaths_Date_norm, left_index=True, right_index=True)
FL_NewDeaths_top5_daily =FL_NewDeaths[FL_NewDeaths['County Name'].isin(['Union County', 'Madison County', 'Lafayette County', 'Gilchrist County', 'Calhoun County'])]

FL_NewDeaths_top5_daily.head()

Unnamed: 0,County Name,State,population,2020-01-22 00:00:00,2020-01-23 00:00:00,2020-01-24 00:00:00,2020-01-25 00:00:00,2020-01-26 00:00:00,2020-01-27 00:00:00,2020-01-28 00:00:00,...,2020-10-07 00:00:00,2020-10-08 00:00:00,2020-10-09 00:00:00,2020-10-10 00:00:00,2020-10-11 00:00:00,2020-10-12 00:00:00,2020-10-13 00:00:00,2020-10-14 00:00:00,2020-10-15 00:00:00,2020-10-16 00:00:00
337,Calhoun County,FL,14105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
350,Gilchrist County,FL,18582,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
363,Lafayette County,FL,8422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
369,Madison County,FL,18493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
393,Union County,FL,15237,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0


In [32]:
# Cases
Lafayette = FL_NewCases_top5_daily[(FL_NewCases_top5_daily['County Name'] == 'Lafayette County')][FL_NewCases_top5_daily.columns[3:]].T
Union = FL_NewCases_top5_daily[(FL_NewCases_top5_daily['County Name'] == 'Union County')][FL_NewCases_top5_daily.columns[3:]].T
Gulf = FL_NewCases_top5_daily[(FL_NewCases_top5_daily['County Name'] == 'Gulf County')][FL_NewCases_top5_daily.columns[3:]].T
Liberty =FL_NewCases_top5_daily[(FL_NewCases_top5_daily['County Name'] == 'Liberty County')][FL_NewCases_top5_daily.columns[3:]].T
Gadsden = FL_NewCases_top5_daily[(FL_NewCases_top5_daily['County Name'] == 'Gadsden County')][FL_NewCases_top5_daily.columns[3:]].T

# Deaths
Union_Deaths = FL_NewDeaths_top5_daily[(FL_NewDeaths_top5_daily['County Name'] == 'Union County')][FL_NewDeaths_top5_daily.columns[3:]].T
Madison_Deaths = FL_NewDeaths_top5_daily[(FL_NewDeaths_top5_daily['County Name'] == 'Madison County')][FL_NewDeaths_top5_daily.columns[3:]].T
Lafayette_Deaths = FL_NewDeaths_top5_daily[(FL_NewDeaths_top5_daily['County Name'] == 'Lafayette County')][FL_NewDeaths_top5_daily.columns[3:]].T
Gilchrist_Deaths =FL_NewDeaths_top5_daily[(FL_NewDeaths_top5_daily['County Name'] == 'Gilchrist County')][FL_NewDeaths_top5_daily.columns[3:]].T
Calhoun_Deaths = FL_NewDeaths_top5_daily[(FL_NewDeaths_top5_daily['County Name'] == 'Calhoun County')][FL_NewDeaths_top5_daily.columns[3:]].T

#### Plot the data

In [72]:
trace0 = go.Scatter(x=Lafayette.index, y=Lafayette.iloc[:,0], 
                    name="Lafayette", 
                    mode="lines+markers") 
trace1 = go.Scatter(x=Union.index, y=Union.iloc[:,0], 
                    name="Union", 
                    mode="lines+markers") 
trace2 = go.Scatter(x=Gulf.index, y=Gulf.iloc[:,0], 
                    name="Gulf", 
                    mode="lines+markers") 
trace3 = go.Scatter(x=Liberty.index, y=Liberty.iloc[:,0], 
                    name="Liberty", 
                    mode="lines+markers") 
trace4 = go.Scatter(x=Gadsden.index, y=Gadsden.iloc[:,0], 
                    name="Gadsden", 
                    mode="lines+markers") 

mydata = [trace0, trace1, trace2, trace3, trace4]

mylayout = go.Layout(
    title="Top 5 counites daily cases"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')
fig.write_image("images/fig3.png")

In [73]:
trace0 = go.Scatter(x=Union_Deaths.index, y=Union_Deaths.iloc[:,0], 
                    name="Union", 
                    mode="lines+markers") 
trace1 = go.Scatter(x=Madison_Deaths.index, y=Madison_Deaths.iloc[:,0], 
                    name="Madison", 
                    mode="lines+markers") 
trace2 = go.Scatter(x=Lafayette_Deaths.index, y=Lafayette_Deaths.iloc[:,0], 
                    name="Lafayette", 
                    mode="lines+markers") 
trace3 = go.Scatter(x=Gilchrist_Deaths.index, y=Gilchrist_Deaths.iloc[:,0], 
                    name="Gilchrist", 
                    mode="lines+markers") 
trace4 = go.Scatter(x=Calhoun_Deaths.index, y=Calhoun_Deaths.iloc[:,0], 
                    name="Calhoun", 
                    mode="lines+markers") 

mydata = [trace0, trace1, trace2, trace3, trace4]

mylayout = go.Layout(
    title="Top 5 counites daily deaths"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')
fig.write_image("images/fig4.png")


### Task 2

Fit a distribution to the number of COVID-19 cases of a state.
- Graphically plot the distribution and describe the distribution statistics.

#### preform poisson distribution

In [63]:
# poisson function
def poisson(size, mu):
    s = []
    for i in range(size[0], size[1]):
        s.append(stats.poisson.pmf(k=i, mu=mu))
    return pd.Series(s)

In [64]:
size = (50, 200)
size2 = (0, 60)

# Cases
FL_poisson = poisson(size, USCases_results_norm.iloc[9]['Mean'])
df = pd.DataFrame(FL_poisson)
df.index = df.index + size[0]

# Deaths
FL_Deaths_poisson = poisson(size2, USDeaths_results_norm.iloc[9]['Mean'])
df_deaths = pd.DataFrame(FL_Deaths_poisson)

In [74]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df[0], fill='tozeroy', name='FL'))

fig.update_layout(
    title="Fitting Florida Covid-19 Cases To Poisson Distribution",
    xaxis_title="Number of Cases",
    yaxis_title="Probability of occurring",
    legend_title="States",
)
fig.show()
fig.write_image("images/fig5.png")


- Describe why the distribution was choosen and its statistics in the report and the notebook.

    Poisson distribution is the best distribution that we can use to distribute the number of COVID-19 cases of a state. The reason why is that is the case is because COVID-19 cases are discreat values.

    By using the function stats.poisson.pmf we can find the probability mass function for a giving mean and predected value.

#### Apply that to other states for comparison

In [66]:
DC_poisson = poisson(size, USCases_results_norm.iloc[7]['Mean'])
ND_poisson = poisson(size, USCases_results_norm.iloc[28]['Mean'])
NC_poisson = poisson(size, USCases_results_norm.iloc[27]['Mean'])
SD_poisson = poisson(size, USCases_results_norm.iloc[41]['Mean'])

DC_Deaths_poisson = poisson(size2, USDeaths_results_norm.iloc[7]['Mean'])
ND_Deaths_poisson = poisson(size2, USDeaths_results_norm.iloc[28]['Mean'])
NC_Deaths_poisson = poisson(size2, USDeaths_results_norm.iloc[27]['Mean'])
SD_Deaths_poisson = poisson(size2, USDeaths_results_norm.iloc[41]['Mean'])

In [67]:
# Cases
df = pd.DataFrame(FL_poisson)
df.index = df.index + size[0]

df2 = pd.DataFrame(DC_poisson)
df2.index = df2.index + size[0]

df3 = pd.DataFrame(ND_poisson)
df3.index = df3.index + size[0]

df4 = pd.DataFrame(NC_poisson)
df4.index = df4.index + size[0]

df5 = pd.DataFrame(SD_poisson)
df5.index = df5.index + size[0]
# Deaths
df_deaths = pd.DataFrame(FL_Deaths_poisson)
df2_deaths = pd.DataFrame(DC_Deaths_poisson)
df3_deaths = pd.DataFrame(ND_Deaths_poisson)
df4_deaths = pd.DataFrame(NC_Deaths_poisson)
df5_deaths = pd.DataFrame(SD_Deaths_poisson)


In [75]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df[0], fill='tozeroy', name='FL')) # fill down to xaxis
fig.add_trace(go.Scatter(x=df2.index, y=df2[0], fill='tozeroy', name='DC')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df3.index, y=df3[0], fill='tozeroy', name='ND')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df4.index, y=df4[0], fill='tozeroy', name='NC')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df5.index, y=df5[0], fill='tozeroy', name='SD')) # fill to trace0 y

fig.update_layout(
    title="Compare Florida Covid-19 Cases With Other States",
    xaxis_title="Probability of occurring",
    yaxis_title="Number of Cases",
    legend_title="States",
)
fig.show()
fig.write_image("images/fig6.png")


In [76]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_deaths.index, y=df_deaths[0], fill='tozeroy', name='FL')) # fill down to xaxis
fig.add_trace(go.Scatter(x=df2_deaths.index, y=df2_deaths[0], fill='tozeroy', name='DC')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df3_deaths.index, y=df3_deaths[0], fill='tozeroy', name='ND')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df4_deaths.index, y=df4_deaths[0], fill='tozeroy', name='NC')) # fill to trace0 y
fig.add_trace(go.Scatter(x=df5_deaths.index, y=df5_deaths[0], fill='tozeroy', name='SD')) # fill to trace0 y
fig.update_layout(
    title="Compare Florida Covid-19 Deaths With Other States",
    xaxis_title="Probability of occurring",
    yaxis_title="Number of Deaths",
    legend_title="States",
)
fig.show()
fig.write_image("images/fig8.png")


Model a poission distribution of COVID-19 cases and deaths of a state and compare to other 5 states. Number of new cases and deaths per 1000 populations. (normalize by population)

#### Normalize the data

In [42]:
norm_val = 10_000_000

x = USDeaths_byStates_Date.astype(float).divide(USDeaths_byStates_SP['population'], axis=0)
USDeaths_States_Date_norm_10 = x.multiply(norm_val)

y = USCases_byStates_Date.astype(float).divide(USCases_byStates_SP['population'], axis=0)
USCases_States_Date_norm_10 = y.multiply(norm_val)

In [43]:
# Convert String to TimeStamp
USDeaths_States_Date_norm_10 = USDeaths_States_Date_norm_10.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

USCases_States_Date_norm_10 = USCases_States_Date_norm_10.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

In [44]:
USDeaths_States_Date_weeks_norm_10 = USDeaths_States_Date_norm_10.resample('W', axis=1).mean().round()
USCases_States_Date_weeks_norm_10 = USCases_States_Date_norm_10.resample('W', axis=1).mean().round()

In [45]:
USDeaths_States_weeks_norm_10 = pd.merge(USDeaths_byStates_SP, USDeaths_States_Date_weeks_norm_10, left_index=True, right_index=True)
USCases_States_weeks_norm_10 = pd.merge(USCases_byStates_SP, USCases_States_Date_weeks_norm_10, left_index=True, right_index=True)

In [46]:
FL = USCases_States_weeks_norm_10.loc[USCases_States_weeks_norm_10['State'] == 'FL'].iloc[0]
NC = USCases_States_weeks_norm_10.loc[USCases_States_weeks_norm_10['State'] == 'NC'].iloc[0]
NY = USCases_States_weeks_norm_10.loc[USCases_States_weeks_norm_10['State'] == 'NY'].iloc[0]
AL = USCases_States_weeks_norm_10.loc[USCases_States_weeks_norm_10['State'] == 'AL'].iloc[0]

Model poission distributions for North Carolina counties COVID-19 in cases and deaths. Number of new cases and deaths per 1000 populations.

#### Select NC to prefore some analysis

In [47]:
NC_NewDeaths = USNew_Deaths[USNew_Deaths['State'] == 'NC']
NC_NewCases = USNew_Cases[USNew_Cases['State'] == 'NC']

In [48]:
NC_NewCases = NC_NewCases.groupby('County Name').sum()

zer = NC_NewCases[NC_NewCases.population != 0]
NC_NewCases_SP = zer.reset_index()[['County Name', 'population']]
NC_NewCases_Date = zer.reset_index().drop(['County Name', 'population'],
                                          axis=1)


NC_NewDeaths = NC_NewDeaths.groupby('County Name').sum()

zer1 = NC_NewDeaths[NC_NewDeaths.population != 0]
NC_NewDeaths_SP = zer1.reset_index()[['County Name', 'population']]
NC_NewDeaths_Date = zer1.reset_index().drop(['County Name', 'population'],
                                            axis=1)

In [49]:
norm_val = 1_000_000
x = NC_NewCases_Date.divide(NC_NewCases_SP['population'], axis=0)
NC_NewCases_Date_norm = x.multiply(norm_val).round()

y = NC_NewDeaths_Date.divide(NC_NewDeaths_SP['population'], axis=0)
NC_NewDeaths_Date_norm = y.multiply(norm_val).round()

In [50]:
# Convert String to TimeStamp
NC_NewCases_Date_norm = NC_NewCases_Date_norm.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')
NC_NewDeaths_Date_norm = NC_NewDeaths_Date_norm.rename(
    columns=lambda x: (pd.to_datetime(x, errors='ignore')),
    errors='raise')

In [51]:
NC_NewCases_week_norm = NC_NewCases_Date_norm.resample(
    'W', axis=1).mean().mean(axis=1).round()
NC_NewCases_norm_mean = pd.merge(NC_NewCases_SP,
                                 pd.DataFrame(NC_NewCases_week_norm,
                                              columns=['Mean']),
                                 left_index=True,
                                 right_index=True)

NC_NewDeaths_week_norm = NC_NewDeaths_Date_norm.resample(
    'W', axis=1).mean().mean(axis=1).round()
NC_NewDeaths_norm_mean = pd.merge(NC_NewDeaths_SP,
                                 pd.DataFrame(NC_NewDeaths_week_norm,
                                              columns=['Mean']),
                                 left_index=True,
                                 right_index=True)

#### preform Poisson distrbution for all NC counties

In [52]:
def calculate_counties_poisson(df, sizeNC = (0, 35)):
    new = pd.DataFrame()
    for i in range(len(df)):
        w = poisson(sizeNC, df.iloc[i]['Mean'])
        new = new.append(pd.DataFrame(w).T)
    new = new.reset_index().drop(['index'], axis=1)
    return new

In [53]:
NC_NewDeaths_norm_mean

Unnamed: 0,County Name,population,Mean
0,Alamance County,169509,1.0
1,Alexander County,37497,1.0
2,Alleghany County,11137,0.0
3,Anson County,24446,1.0
4,Ashe County,27203,0.0
...,...,...,...
95,Wayne County,123131,2.0
96,Wilkes County,68412,2.0
97,Wilson County,81801,3.0
98,Yadkin County,37667,1.0


In [54]:
nc_cases_pro = calculate_counties_poisson(NC_NewCases_norm_mean, sizeNC=(0, 500))
nc_death_pro = calculate_counties_poisson(NC_NewDeaths_norm_mean)

In [55]:
NC_NewCases_norm_mean_probablitity = NC_NewCases_norm_mean.merge(nc_cases_pro, right_index=True,
             left_index=True)
NC_NewDeaths_norm_mean_probablitity = NC_NewDeaths_norm_mean.merge(nc_death_pro, right_index=True,
             left_index=True)

In [77]:
NC_Cases_prob = NC_NewCases_norm_mean_probablitity

fig = go.Figure()
for i in range(len(NC_Cases_prob)):
    fig.add_trace(
        go.Scatter(x=NC_Cases_prob.index,
                   y=NC_Cases_prob.loc[i][3:],
                   fill='tozeroy',
                   name=NC_Cases_prob.loc[i]['County Name']))

fig.update_layout(
    title=
    "Poisson Distributions For North Carolina Counties COVID-19 in Cases",
    xaxis_title="Probability of occurring",
    yaxis_title="Number of Cases",
    legend_title="Counties",
)
fig.show()
fig.write_image("images/fig9.png")


In [78]:
NC_Deaths_prob = NC_NewDeaths_norm_mean_probablitity

fig = go.Figure()
for i in range(len(NC_Deaths_prob)):
    fig.add_trace(
        go.Scatter(x=NC_Deaths_prob.index,
                   y=NC_Deaths_prob.loc[i][4:],
                   fill='tozeroy',
                   name=NC_Deaths_prob.loc[i]['County Name']))

fig.update_layout(
    title=
    "Poisson Distributions For North Carolina Counties COVID-19 in Deaths",
    xaxis_title="Probability of occurring",
    yaxis_title="Number of Deaths",
    legend_title="Counties",
)
fig.show()
fig.write_image("images/fig10.png")


Perform corelation between Enrichment data valiables and COVID-19 cases to observe any patterns.

In [58]:
# Select the State that 
FL_New_Cases =US_New_Cases[US_New_Cases['State'] == 'FL']
FL_New_Cases =FL_New_Cases.fillna(0)
FL_New_Cases.reset_index(inplace=True)
FL_New_Cases.drop(['index'], axis=1, inplace=True)

FL_New_Deaths =US_New_Death[US_New_Death['State'] == 'FL']
FL_New_Deaths =FL_New_Deaths.fillna(0)
FL_New_Deaths.reset_index(inplace=True)
FL_New_Deaths.drop(['index'], axis=1, inplace=True)


In [59]:
FL_bed_counties =Bed[Bed['State'] == 'FL']
FL_bed_counties =FL_bed_counties.fillna(0)
FL_bed_counties.reset_index(inplace=True)

In [60]:
x = FL_New_Cases.groupby(['County Name'], as_index=False).sum()[FL_New_Cases.columns[3:]].sum(axis=1).to_frame()

y = FL_New_Deaths.groupby(['County Name'], as_index=False).sum()[FL_New_Deaths.columns[3:]].sum(axis=1).to_frame()

In [61]:
Death_NUM_ICU = y[0].corr(Bed['NUM_ICU_BEDS'])
Cases_NUM_ICU = x[0].corr(Bed['NUM_ICU_BEDS'])

Death_AVG_VENTILATOR_USAGE = y[0].corr(Bed['AVG_VENTILATOR_USAGE'])
Cases_AVG_VENTILATOR_USAGE = x[0].corr(Bed['AVG_VENTILATOR_USAGE'])

Death_NUM_STAFFED_BEDS = y[0].corr(Bed['NUM_STAFFED_BEDS'])
Cases_NUM_STAFFED_BEDS = x[0].corr(Bed['NUM_STAFFED_BEDS'])

Formulate hypothesis between Enrichment data and number of cases to be compared against states. Choose 3 different variables to compare against.
- For example: Does higher employment data lead to higher covid case numbers or more rapid increase in covid cases.


In [62]:
print(f"Corralation between new Death and number of ICU beds: {Death_NUM_ICU}")
print(f"Corralation between new Death and avg ventilator usage: {Death_AVG_VENTILATOR_USAGE}")
print(f"Corralation between new Death and number of staffed beds: {Death_NUM_STAFFED_BEDS}")
print()
print(f"Corralation between new Cases and number of ICU beds: {Cases_NUM_ICU}")
print(f"Corralation between new Cases and avg ventilator usage: {Cases_AVG_VENTILATOR_USAGE}")
print(f"Corralation between new Cases and number of staffed beds: {Cases_NUM_STAFFED_BEDS}")

Corralation between new Death and number of ICU beds: -0.019879705854529493
Corralation between new Death and avg ventilator usage: -0.04101837630256754
Corralation between new Death and number of staffed beds: -0.04454985654967586

Corralation between new Cases and number of ICU beds: 0.020085438865893222
Corralation between new Cases and avg ventilator usage: -0.016252742032337897
Corralation between new Cases and number of staffed beds: -0.01939198087606299


The Carrolation between Florida's Deaths and number of ICU beds is negative, meaning that as more the number of ICU beds increase the less deaths happen in the state. That also true in case of avg ventilator usuage and staffed beds. On the other hand, the corralation bwetween new cases and number of ICU beds is possitve. I think it's possitive because the more beds in the hospitals the more people who doesn't have covid visit their family and they get covid from other people who are in the other rooms. 