In [24]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import plotly.express as px
import plotly
import plotly.graph_objs as go

## Read Datasets

In [25]:
#read super Covid-19 dataframe. 
USCovid = pd.read_csv("../data/output/covid.csv")
#read country cases dataset and choose countries with similar pop as US
CountryNewCases = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/new_cases.csv")
CountryNewDeaths = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/new_deaths.csv")
CountryPop = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/locations.csv")

CountryNewCases = CountryNewCases[["date","Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]]
CountryNewDeaths = CountryNewDeaths[["date","Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]]
PopArray = ["Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]
CountryPop = CountryPop.loc[CountryPop["location"].isin(PopArray)]

MyFivePop = CountryPop.drop(["countriesAndTerritories","continent","population_year"],axis = 1) 

#drop unneccesary variables in Super covid-19 dataframe. 
USCovid = USCovid.drop(["Unnamed: 0","countyFIPS","stateFIPS"], axis=1)

## Sum up United States Cases together and get New cases and New deaths Normalized by cases per 1 million people. 

In [26]:
#Removes "Statewide unallocated" value in covid dataframe
covid_df = USCovid.drop(USCovid.loc[USCovid['County Name'] == "Statewide Unallocated"].index)

#sums up every covid cases from every county and state to one row. 
sums = covid_df.select_dtypes(pd.np.number).sum(axis = 0).rename('UnitedStates')
sums = sums.to_frame()


In [27]:
#get Population value of US and all countries to normalize in later cells. 
USPopulation = sums.values[0]
Bang_Pop = MyFivePop.values[0,1]
Brazil_Pop = MyFivePop.values[1,1]
Indon_Pop = MyFivePop.values[2,1]
Nigeria_Pop = MyFivePop.values[3,1]
Pak_Pop = MyFivePop.values[4,1]



In [28]:
#seperate cases and deaths. 
US_cases = sums.filter(regex = "x", axis = 0)
US_deaths = sums.filter(regex = "y",axis = 0)

In [29]:
#calculate only new cases 
NewUScases = US_cases.diff()
NewUSdeaths = US_deaths.diff()

print(NewUScases[260:])
print(NewUSdeaths[260:])

            UnitedStates
10/8/20_x        56462.0
10/9/20_x        56731.0
10/10/20_x       54111.0
10/11/20_x       43207.0
10/12/20_x       42554.0
10/13/20_x       50399.0
10/14/20_x       59692.0
10/15/20_x       66735.0
10/16/20_x       63931.0
            UnitedStates
10/8/20_y          930.0
10/9/20_y          906.0
10/10/20_y         595.0
10/11/20_y         501.0
10/12/20_y         285.0
10/13/20_y         762.0
10/14/20_y         671.0
10/15/20_y        1172.0
10/16/20_y         851.0


In [30]:
#calculate and Normalize dataset to cases per 50 million people. 
norm_val = 50_000_000
USNormalize = lambda x: (x / USPopulation) * norm_val
Norm_NewUScases = NewUScases.transform(USNormalize).round(0)
Norm_NewUSdeaths = NewUSdeaths.transform(USNormalize).round(0)
print(Norm_NewUScases)
print(Norm_NewUSdeaths)

            UnitedStates
1/22/20_x            NaN
1/23/20_x            0.0
1/24/20_x            0.0
1/25/20_x            0.0
1/26/20_x            0.0
...                  ...
10/12/20_x        6482.0
10/13/20_x        7677.0
10/14/20_x        9093.0
10/15/20_x       10166.0
10/16/20_x        9738.0

[269 rows x 1 columns]
            UnitedStates
1/22/20_y            NaN
1/23/20_y            0.0
1/24/20_y            0.0
1/25/20_y            0.0
1/26/20_y            0.0
...                  ...
10/12/20_y          43.0
10/13/20_y         116.0
10/14/20_y         102.0
10/15/20_y         179.0
10/16/20_y         130.0

[269 rows x 1 columns]


## Get weekly average of cases

In [31]:
WeeklyUS_cases = Norm_NewUScases.groupby(np.arange(len(Norm_NewUScases))//7).mean().round(0).astype(int).rename_axis('Week')
WeeklyUS_deaths = Norm_NewUSdeaths.groupby(np.arange(len(Norm_NewUSdeaths))//7).mean().round(0).astype(int).rename_axis('Week')

In [32]:
print(WeeklyUS_cases)
print(WeeklyUS_deaths)

      UnitedStates
Week              
0                0
1                0
2                0
3                0
4                0
5                1
6               20
7              112
8             1013
9             2791
10            4548
11            4525
12            4324
13            4421
14            4127
15            3565
16            3387
17            3288
18            3227
19            3190
20            3424
21            4462
22            6211
23            7669
24            9269
25           10073
26            9735
27            8843
28            8036
29            7216
30            6323
31            6344
32            5570
33            5789
34            6467
35            6280
36            6660
37            7690
38            9666
      UnitedStates
Week              
0                0
1                0
2                0
3                0
4                0
5                0
6                0
7                2
8               13
9           

## Find Mean, Median and Mode for all weeks of US. 

In [33]:
#Calculates mean median and mode for cases and deaths in US. 
US_MeanCases = WeeklyUS_cases.mean().round(0)
US_MedianCases = WeeklyUS_cases.median()
US_ModeCases = WeeklyUS_cases.mode()


US_MeanDeaths = WeeklyUS_deaths.mean().round(0)
US_MedianDeaths = WeeklyUS_deaths.median()
US_ModeDeaths = WeeklyUS_deaths.mode()

US_ModeCases = US_ModeCases.values.tolist()
US_ModeDeaths = US_ModeDeaths.values.tolist()


## Calculate Country Mean, Median, and Mode weekly statistics to compare with U.S

In [34]:
#fill NaN with zero for country dataset.
Country_NewCases = CountryNewCases[22:290].fillna(0)
Country_NewDeaths = CountryNewDeaths[22:290].fillna(0)

print(Country_NewCases)
print(Country_NewDeaths)

           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0       0      0.0
23   2020-01-23         0.0        0.0       0.0       0      0.0
24   2020-01-24         0.0        0.0       0.0       0      0.0
25   2020-01-25         0.0        0.0       0.0       0      0.0
26   2020-01-26         0.0        0.0       0.0       0      0.0
..          ...         ...        ...       ...     ...      ...
285  2020-10-11      1203.0     4294.0     666.0   26749    111.0
286  2020-10-12      1193.0     4497.0       0.0   12345    163.0
287  2020-10-13      1472.0     3267.0     916.0    8426    164.0
288  2020-10-14      1537.0     3906.0     615.0   10220    225.0
289  2020-10-15      1684.0     4127.0     755.0   27235    179.0

[268 rows x 6 columns]
           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0       0      0.0
23   2020-01-23         0.0        0.0       0.0    

## Normalize Cases and Death by 1 million population. 

In [35]:
#calculate and Normalize dataset to cases per 1 million people. 
Norm_CountryCases = Country_NewCases.assign(Bangladesh = lambda x: (x['Bangladesh'] / Bang_Pop) * norm_val).round(0)
Norm_CountryCases = Norm_CountryCases.assign(Indonesia = lambda x: (x['Indonesia'] / Indon_Pop) * norm_val).round(0)
Norm_CountryCases = Norm_CountryCases.assign(Pakistan = lambda x: (x['Pakistan'] / Pak_Pop) * norm_val).round(0)
Norm_CountryCases = Norm_CountryCases.assign(Brazil = lambda x: (x['Brazil'] / Brazil_Pop ) * norm_val).round(0)
Norm_CountryCases = Norm_CountryCases.assign(Nigeria = lambda x: (x['Nigeria'] / Nigeria_Pop ) * norm_val).round(0)

Norm_Country_NewDeaths = Country_NewDeaths.assign(Bangladesh = lambda x: (x['Bangladesh'] / Bang_Pop) * norm_val).round(0)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Indonesia = lambda x: (x['Indonesia'] / Indon_Pop) * norm_val).round(0)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Pakistan = lambda x: (x['Pakistan'] / Pak_Pop) * norm_val).round(0)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Brazil = lambda x: (x['Brazil'] / Brazil_Pop ) * norm_val).round(0)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Nigeria = lambda x: (x['Nigeria'] / Nigeria_Pop ) * norm_val).round(0)



print(Norm_CountryCases)
print(Norm_Country_NewDeaths)


           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0     0.0      0.0
23   2020-01-23         0.0        0.0       0.0     0.0      0.0
24   2020-01-24         0.0        0.0       0.0     0.0      0.0
25   2020-01-25         0.0        0.0       0.0     0.0      0.0
26   2020-01-26         0.0        0.0       0.0     0.0      0.0
..          ...         ...        ...       ...     ...      ...
285  2020-10-11       365.0      785.0     151.0  6292.0     27.0
286  2020-10-12       362.0      822.0       0.0  2904.0     40.0
287  2020-10-13       447.0      597.0     207.0  1982.0     40.0
288  2020-10-14       467.0      714.0     139.0  2404.0     55.0
289  2020-10-15       511.0      754.0     171.0  6406.0     43.0

[268 rows x 6 columns]
           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0     0.0      0.0
23   2020-01-23         0.0        0.0       0.0    

## Find average of Cases and deaths for each week. 

In [36]:
#calculates weekly average of cases and deaths
WeeklyCountries_cases = Norm_CountryCases .groupby(np.arange(len(Country_NewCases))//7).mean().round(0).astype(int).rename_axis('Week')
WeeklyCountries_deaths = Norm_Country_NewDeaths.groupby(np.arange(len(Country_NewDeaths))//7).mean().round(0).astype(int).rename_axis('Week')

print(WeeklyCountries_cases)
print(WeeklyCountries_deaths)

      Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
Week                                                  
0              0          0         0       0        0
1              0          0         0       0        0
2              0          0         0       0        0
3              0          0         0       0        0
4              0          0         0       0        0
5              0          0         0       0        0
6              0          0         0       1        0
7              0          3         5       7        0
8              1         12        23      56        1
9              1         22        24      90        3
10             3         28        72     251        4
11            29         54        60     382        4
12            93         57       113     576       11
13           129         61       157     871       23
14           184         65       240    1387       51
15           241         70       342    2035       64
16        

## Calculate Mean, Median, and Mode of cases and deaths for all 5 countries. 

In [37]:
FiveCountry_MeanCases = WeeklyCountries_cases.mean().round(0)
FiveCountry_MedianCases = WeeklyCountries_cases.median()
FiveCountry_ModeCases = WeeklyCountries_cases.mode()

FiveCountry_MeanDeaths = WeeklyCountries_deaths.mean().round(0)
FiveCountry_MedianDeaths = WeeklyCountries_deaths.median()
FiveCountry_ModeDeaths = WeeklyCountries_deaths.mode()

FiveCountry_ModeCases = FiveCountry_ModeCases.values.tolist()
FiveCountry_ModeDeaths = FiveCountry_ModeDeaths.values.tolist()


## Puts Mean,Median and Mode values in a list to put into a dataframe for easier comparisions. 

In [38]:
ListCase = [["United States",US_MeanCases[0],US_MedianCases[0],US_ModeCases[0][0]],
        ["Bangladesh", FiveCountry_MeanCases[0],FiveCountry_MedianCases[0],FiveCountry_ModeCases[0][0]],
        ["Indonesia", FiveCountry_MeanCases[1],FiveCountry_MedianCases[1],FiveCountry_ModeCases[0][1]],
        ["Pakistan", FiveCountry_MeanCases[2],FiveCountry_MedianCases[2],FiveCountry_ModeCases[0][2]],
        ["Brazil", FiveCountry_MeanCases[3],FiveCountry_MedianCases[3],FiveCountry_ModeCases[0][3]],
       ["Nigeria", FiveCountry_MeanCases[4],FiveCountry_MedianCases[4],FiveCountry_ModeCases[0][4]]]


ListDeaths = [["United States",US_MeanDeaths[0],US_MedianDeaths[0],US_ModeDeaths[0][0]],
        ["Bangladesh", FiveCountry_MeanDeaths[0],FiveCountry_MedianDeaths[0],FiveCountry_ModeDeaths[0][0]],
        ["Indonesia", FiveCountry_MeanDeaths[1],FiveCountry_MedianDeaths[1],FiveCountry_ModeDeaths[0][1]],
        ["Pakistan", FiveCountry_MeanDeaths[2],FiveCountry_MedianDeaths[2],FiveCountry_ModeDeaths[0][2]],
        ["Brazil", FiveCountry_MeanDeaths[3],FiveCountry_MedianDeaths[3],FiveCountry_ModeDeaths[0][3]],
       ["Nigeria", FiveCountry_MeanDeaths[4],FiveCountry_MedianDeaths[4],FiveCountry_ModeDeaths[0][4]]]

## WeeklyStatistics Comparision of Cases

In [39]:
CountryWeeklyStatisticsCase = pd.DataFrame(ListCase, columns = ['Country','Mean', 'Median', 'Mode'])

CountryWeeklyStatisticsCase


Unnamed: 0,Country,Mean,Median,Mode
0,United States,4571.0,4462.0,0
1,Bangladesh,435.0,431.0,0
2,Indonesia,244.0,133.0,0
3,Pakistan,269.0,144.0,0
4,Brazil,4510.0,5093.0,0
5,Nigeria,55.0,43.0,0


## WeeklyStatistics Comparision of Deaths

In [40]:
CountryWeeklyStatisticsDeath = pd.DataFrame(ListDeaths, columns = ['Country','Mean', 'Median', 'Mode'])

CountryWeeklyStatisticsDeath

Unnamed: 0,Country,Mean,Median,Mode
0,United States,123.0,118.0,0
1,Bangladesh,6.0,8.0,0
2,Indonesia,8.0,6.0,0
3,Pakistan,5.0,2.0,0
4,Brazil,133.0,161.0,0
5,Nigeria,1.0,1.0,0


In [41]:
#changed date format
Norm_NewUScases = Norm_NewUScases.T
Norm_NewUScases=Norm_NewUScases.rename(columns=lambda x: (pd.to_datetime(x.split('_')[0], errors= 'ignore')), errors='raise')
Norm_NewUScases = Norm_NewUScases.T
Norm_NewUScases.head()

Unnamed: 0,UnitedStates
2020-01-22,
2020-01-23,0.0
2020-01-24,0.0
2020-01-25,0.0
2020-01-26,0.0


# United States  

In [42]:

trace0 = go.Scatter(x=Norm_NewUScases.index, y=Norm_NewUScases["UnitedStates"], 
                    name="US", 
                    mode="lines+markers") 

trace1 = go.Scatter(x=Norm_CountryCases['date'], y=Norm_CountryCases["Bangladesh"], 
                    name="Bangladesh", 
                    mode="lines+markers")  

trace2 = go.Scatter(x=Norm_CountryCases['date'], y=Norm_CountryCases["Indonesia"], 
                    name="Indonesia", 
                    mode="lines+markers") 

trace3 = go.Scatter(x=Norm_CountryCases['date'], y=Norm_CountryCases["Pakistan"], 
                    name="Pakistan", 
                    mode="lines+markers") 

trace4 = go.Scatter(x=Norm_CountryCases['date'], y=Norm_CountryCases["Brazil"], 
                    name="Brazil", 
                    mode="lines+markers") 

trace5 = go.Scatter(x=Norm_CountryCases['date'], y=Norm_CountryCases["Nigeria"], 
                    name="Nigeria", 
                    mode="lines+markers")

mydata = go.Data([trace0, trace1,trace2, trace3,trace4,trace5])

mylayout = go.Layout(
    title="Cases"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')


In [43]:

trace0 = go.Scatter(x=WeeklyUS_cases.index, y=WeeklyUS_cases["UnitedStates"], 
                    name="US", 
                    mode="lines+markers") 

trace1 = go.Scatter(x=WeeklyCountries_cases.index, y=WeeklyCountries_cases["Bangladesh"], 
                    name="Bangladesh", 
                    mode="lines+markers")  

trace2 = go.Scatter(x=WeeklyCountries_cases.index, y=WeeklyCountries_cases["Indonesia"], 
                    name="Indonesia", 
                    mode="lines+markers") 

trace3 = go.Scatter(x=WeeklyCountries_cases.index, y=WeeklyCountries_cases["Pakistan"], 
                    name="Pakistan", 
                    mode="lines+markers") 

trace4 = go.Scatter(x=WeeklyCountries_cases.index, y=WeeklyCountries_cases["Brazil"], 
                    name="Brazil", 
                    mode="lines+markers") 

trace5 = go.Scatter(x=WeeklyCountries_cases.index, y=WeeklyCountries_cases["Nigeria"], 
                    name="Nigeria", 
                    mode="lines+markers")

mydata = go.Data([trace0, trace1,trace2, trace3,trace4,trace5])

mylayout = go.Layout(
    title="Weekly Cases"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')

In [44]:
#changed date format
Norm_NewUSdeaths = Norm_NewUSdeaths.T
Norm_NewUSdeaths=Norm_NewUSdeaths.rename(columns=lambda x: (pd.to_datetime(x.split('_')[0], errors= 'ignore')), errors='raise')
Norm_NewUSdeaths = Norm_NewUSdeaths.T
Norm_NewUSdeaths.head()

Unnamed: 0,UnitedStates
2020-01-22,
2020-01-23,0.0
2020-01-24,0.0
2020-01-25,0.0
2020-01-26,0.0


In [45]:
trace0 = go.Scatter(x=Norm_NewUSdeaths.index, y=Norm_NewUSdeaths["UnitedStates"], 
                    name="US", 
                    mode="lines+markers") 

trace1 = go.Scatter(x=Norm_Country_NewDeaths['date'], y=Norm_Country_NewDeaths["Bangladesh"], 
                    name="Bangladesh", 
                    mode="lines+markers")  

trace2 = go.Scatter(x=Norm_Country_NewDeaths['date'], y=Norm_Country_NewDeaths["Indonesia"], 
                    name="Indonesia", 
                    mode="lines+markers") 

trace3 = go.Scatter(x=Norm_Country_NewDeaths['date'], y=Norm_Country_NewDeaths["Pakistan"], 
                    name="Pakistan", 
                    mode="lines+markers") 

trace4 = go.Scatter(x=Norm_Country_NewDeaths['date'], y=Norm_Country_NewDeaths["Brazil"], 
                    name="Brazil", 
                    mode="lines+markers") 

trace5 = go.Scatter(x=Norm_Country_NewDeaths['date'], y=Norm_Country_NewDeaths["Nigeria"], 
                    name="Nigeria", 
                    mode="lines+markers")

mydata = go.Data([trace0, trace1,trace2, trace3,trace4,trace5])

mylayout = go.Layout(
    title="Deaths"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')


In [46]:

trace0 = go.Scatter(x=WeeklyUS_deaths.index, y=WeeklyUS_deaths["UnitedStates"], 
                    name="US", 
                    mode="lines+markers") 

trace1 = go.Scatter(x=WeeklyCountries_deaths.index, y=WeeklyCountries_deaths["Bangladesh"], 
                    name="Bangladesh", 
                    mode="lines+markers")  

trace2 = go.Scatter(x=WeeklyCountries_deaths.index, y=WeeklyCountries_deaths["Indonesia"], 
                    name="Indonesia", 
                    mode="lines+markers") 

trace3 = go.Scatter(x=WeeklyCountries_deaths.index, y=WeeklyCountries_deaths["Pakistan"], 
                    name="Pakistan", 
                    mode="lines+markers") 

trace4 = go.Scatter(x=WeeklyCountries_deaths.index, y=WeeklyCountries_deaths["Brazil"], 
                    name="Brazil", 
                    mode="lines+markers") 

trace5 = go.Scatter(x=WeeklyCountries_deaths.index, y=WeeklyCountries_deaths["Nigeria"], 
                    name="Nigeria", 
                    mode="lines+markers")

mydata = go.Data([trace0, trace1,trace2, trace3,trace4,trace5])

mylayout = go.Layout(
    title="Weekly Deaths"
)

fig = go.Figure(data=mydata, layout=mylayout)

plotly.offline.iplot(fig, filename = '')

# - The week of July 16 2020 was the peak week for the US
