In [241]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt

## Read Datasets

In [242]:
#read super Covid-19 dataframe. 
USCovid = pd.read_csv("../data/output/covid.csv")
#read country cases dataset and choose countries with similar pop as US
CountryNewCases = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/new_cases.csv")
CountryNewDeaths = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/new_deaths.csv")
CountryPop = pd.read_csv("https://covid.ourworldindata.org/data/ecdc/locations.csv")

CountryNewCases = CountryNewCases[["date","Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]]
CountryNewDeaths = CountryNewDeaths[["date","Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]]
PopArray = ["Bangladesh","Indonesia","Pakistan","Brazil","Nigeria"]
CountryPop = CountryPop.loc[CountryPop["location"].isin(PopArray)]

MyFivePop = CountryPop.drop(["countriesAndTerritories","continent","population_year"],axis = 1) 

#drop unneccesary variables in Super covid-19 dataframe. 
USCovid = USCovid.drop(["Unnamed: 0","countyFIPS","stateFIPS"], axis=1)

## Sum up United States Cases together and get New cases and New deaths Normalized by cases per 1 million people. 

In [243]:
#Removes "Statewide unallocated" value in covid dataframe
covid_df = USCovid.drop(USCovid.loc[USCovid['County Name'] == "Statewide Unallocated"].index)

#sums up every covid cases from every county and state to one row. 
sums = covid_df.select_dtypes(pd.np.number).sum(axis = 0).rename('UnitedStates')
sums = sums.to_frame()


In [244]:
#get Population value of US and all countries to normalize in later cells. 
USPopulation = sums.values[0]
Bang_Pop = MyFivePop.values[0,1]
Brazil_Pop = MyFivePop.values[1,1]
Indon_Pop = MyFivePop.values[2,1]
Nigeria_Pop = MyFivePop.values[3,1]
Pak_Pop = MyFivePop.values[4,1]



In [245]:
#seperate cases and deaths. 
US_cases = sums.filter(regex = "x", axis = 0)
US_deaths = sums.filter(regex = "y",axis = 0)

In [246]:
#calculate only new cases 
NewUScases = US_cases.diff()
NewUSdeaths = US_deaths.diff()

print(NewUScases[260:])
print(NewUSdeaths[260:])

            UnitedStates
10/8/20_x        56462.0
10/9/20_x        56731.0
10/10/20_x       54111.0
10/11/20_x       43207.0
10/12/20_x       42554.0
10/13/20_x       50399.0
10/14/20_x       59692.0
10/15/20_x       66735.0
10/16/20_x       63931.0
            UnitedStates
10/8/20_y          930.0
10/9/20_y          906.0
10/10/20_y         595.0
10/11/20_y         501.0
10/12/20_y         285.0
10/13/20_y         762.0
10/14/20_y         671.0
10/15/20_y        1172.0
10/16/20_y         851.0


In [254]:
#calculate and Normalize dataset to cases per 1 million peopl. 
USNormalize = lambda x: (x / USPopulation) * 20000000
Norm_NewUScases = NewUScases.transform(USNormalize)
Norm_NewUSdeaths = NewUSdeaths.transform(USNormalize)
print(Norm_NewUScases)
print(Norm_NewUSdeaths)

            UnitedStates
1/22/20_x            NaN
1/23/20_x       0.000000
1/24/20_x       0.060931
1/25/20_x       0.000000
1/26/20_x       0.182793
...                  ...
10/12/20_x   2592.862652
10/13/20_x   3070.867246
10/14/20_x   3637.100094
10/15/20_x   4066.237934
10/16/20_x   3895.387089

[269 rows x 1 columns]
            UnitedStates
1/22/20_y            NaN
1/23/20_y       0.000000
1/24/20_y       0.000000
1/25/20_y       0.000000
1/26/20_y       0.000000
...                  ...
10/12/20_y     17.365368
10/13/20_y     46.429509
10/14/20_y     40.884778
10/15/20_y     71.411266
10/16/20_y     51.852379

[269 rows x 1 columns]


## Get weekly average of cases

In [255]:
WeeklyUS_cases = Norm_NewUScases.groupby(np.arange(len(Norm_NewUScases))//7).mean().round(0).astype(int).rename_axis('Week')
WeeklyUS_deaths = Norm_NewUSdeaths.groupby(np.arange(len(Norm_NewUSdeaths))//7).mean().round(0).astype(int).rename_axis('Week')

In [256]:
print(WeeklyUS_cases)
print(WeeklyUS_deaths)

      UnitedStates
Week              
0                0
1                0
2                0
3                0
4                0
5                1
6                8
7               45
8              405
9             1117
10            1819
11            1810
12            1730
13            1768
14            1651
15            1426
16            1355
17            1315
18            1291
19            1276
20            1370
21            1785
22            2485
23            3068
24            3708
25            4029
26            3894
27            3537
28            3214
29            2887
30            2529
31            2537
32            2228
33            2315
34            2587
35            2512
36            2664
37            3076
38            3866
      UnitedStates
Week              
0                0
1                0
2                0
3                0
4                0
5                0
6                0
7                1
8                5
9           

## Find Mean, Median and Mode for all weeks of US. 

In [257]:
#Calculates mean median and mode for cases and deaths in US. 
US_MeanCases = WeeklyUS_cases.mean().round(2)
US_MedianCases = WeeklyUS_cases.median()
US_ModeCases = WeeklyUS_cases.mode()


US_MeanDeaths = WeeklyUS_deaths.mean().round(2)
US_MedianDeaths = WeeklyUS_deaths.median()
US_ModeDeaths = WeeklyUS_deaths.mode()

US_ModeCases = US_ModeCases.values.tolist()
US_ModeDeaths = US_ModeDeaths.values.tolist()


## Calculate Country Mean, Median, and Mode weekly statistics to compare with U.S

In [258]:
#fill NaN with zero for country dataset.
Country_NewCases = CountryNewCases[22:290].fillna(0)
Country_NewDeaths = CountryNewDeaths[22:290].fillna(0)

print(Country_NewCases)
print(Country_NewDeaths)

           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0       0      0.0
23   2020-01-23         0.0        0.0       0.0       0      0.0
24   2020-01-24         0.0        0.0       0.0       0      0.0
25   2020-01-25         0.0        0.0       0.0       0      0.0
26   2020-01-26         0.0        0.0       0.0       0      0.0
..          ...         ...        ...       ...     ...      ...
285  2020-10-11      1203.0     4294.0     666.0   26749    111.0
286  2020-10-12      1193.0     4497.0       0.0   12345    163.0
287  2020-10-13      1472.0     3267.0     916.0    8426    164.0
288  2020-10-14      1537.0     3906.0     615.0   10220    225.0
289  2020-10-15      1684.0     4127.0     755.0   27235    179.0

[268 rows x 6 columns]
           date  Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
22   2020-01-22         0.0        0.0       0.0       0      0.0
23   2020-01-23         0.0        0.0       0.0    

## Normalize Cases and Death by 1 million population. 

In [259]:
#calculate and Normalize dataset to cases per 1 million people. 
Norm_CountryCases = Country_NewCases.assign(Bangladesh = lambda x: (x['Bangladesh'] / Bang_Pop) * 20000000)
Norm_CountryCases = Norm_CountryCases.assign(Indonesia = lambda x: (x['Indonesia'] / Indon_Pop) * 20000000)
Norm_CountryCases = Norm_CountryCases.assign(Pakistan = lambda x: (x['Pakistan'] / Pak_Pop) * 20000000)
Norm_CountryCases = Norm_CountryCases.assign(Brazil = lambda x: (x['Brazil'] / Brazil_Pop ) * 20000000)
Norm_CountryCases = Norm_CountryCases.assign(Nigeria = lambda x: (x['Nigeria'] / Nigeria_Pop ) * 20000000)

Norm_Country_NewDeaths = Country_NewDeaths.assign(Bangladesh = lambda x: (x['Bangladesh'] / Bang_Pop) * 20000000)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Indonesia = lambda x: (x['Indonesia'] / Indon_Pop) * 20000000)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Pakistan = lambda x: (x['Pakistan'] / Pak_Pop) * 20000000)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Brazil = lambda x: (x['Brazil'] / Brazil_Pop ) * 20000000)
Norm_Country_NewDeaths = Norm_Country_NewDeaths.assign(Nigeria = lambda x: (x['Nigeria'] / Nigeria_Pop ) * 20000000)



print(Norm_CountryCases)
print(Norm_Country_NewDeaths)


           date  Bangladesh   Indonesia   Pakistan       Brazil    Nigeria
22   2020-01-22    0.000000    0.000000   0.000000     0.000000   0.000000
23   2020-01-23    0.000000    0.000000   0.000000     0.000000   0.000000
24   2020-01-24    0.000000    0.000000   0.000000     0.000000   0.000000
25   2020-01-25    0.000000    0.000000   0.000000     0.000000   0.000000
26   2020-01-26    0.000000    0.000000   0.000000     0.000000   0.000000
..          ...         ...         ...        ...          ...        ...
285  2020-10-11  146.093206  313.976540  60.300871  2516.849301  10.769402
286  2020-10-12  144.878799  328.819865   0.000000  1161.557614  15.814527
287  2020-10-13  178.760764  238.882477  82.936333   792.813646  15.911548
288  2020-10-14  186.654412  285.606046  55.683237   961.613513  21.829868
289  2020-10-15  204.506201  301.765528  68.359096  2562.577693  17.366873

[268 rows x 6 columns]
           date  Bangladesh  Indonesia  Pakistan     Brazil   Nigeria
22   2

## Find average of Cases and deaths for each week. 

In [260]:
#calculates weekly average of cases and deaths
WeeklyCountries_cases = Norm_CountryCases .groupby(np.arange(len(Country_NewCases))//7).mean().round(0).astype(int).rename_axis('Week')
WeeklyCountries_deaths = Norm_Country_NewDeaths.groupby(np.arange(len(Country_NewDeaths))//7).mean().round(0).astype(int).rename_axis('Week')

print(WeeklyCountries_cases)
print(WeeklyCountries_deaths)

      Bangladesh  Indonesia  Pakistan  Brazil  Nigeria
Week                                                  
0              0          0         0       0        0
1              0          0         0       0        0
2              0          0         0       0        0
3              0          0         0       0        0
4              0          0         0       0        0
5              0          0         0       0        0
6              0          0         0       0        0
7              0          1         2       3        0
8              0          5         9      22        1
9              0          9        10      36        1
10             1         11        29     101        1
11            12         22        24     153        1
12            37         23        45     231        4
13            51         24        63     348        9
14            73         26        96     555       20
15            96         28       137     814       25
16        

## Calculate Mean, Median, and Mode of cases and deaths for all 5 countries. 

In [261]:
FiveCountry_MeanCases = WeeklyCountries_cases.mean().round(2)
FiveCountry_MedianCases = WeeklyCountries_cases.median()
FiveCountry_ModeCases = WeeklyCountries_cases.mode()

FiveCountry_MeanDeaths = WeeklyCountries_deaths.mean().round(2)
FiveCountry_MedianDeaths = WeeklyCountries_deaths.median()
FiveCountry_ModeDeaths = WeeklyCountries_deaths.mode()

FiveCountry_ModeCases = FiveCountry_ModeCases.values.tolist()
FiveCountry_ModeDeaths = FiveCountry_ModeDeaths.values.tolist()


## Puts Mean,Median and Mode values in a list to put into a dataframe for easier comparisions. 

In [262]:
ListCase = [["United States",US_MeanCases[0],US_MedianCases[0],US_ModeCases[0][0]],
        ["Bangladesh", FiveCountry_MeanCases[0],FiveCountry_MedianCases[0],FiveCountry_ModeCases[0][0]],
        ["Indonesia", FiveCountry_MeanCases[1],FiveCountry_MedianCases[1],FiveCountry_ModeCases[0][1]],
        ["Pakistan", FiveCountry_MeanCases[2],FiveCountry_MedianCases[2],FiveCountry_ModeCases[0][2]],
        ["Brazil", FiveCountry_MeanCases[3],FiveCountry_MedianCases[3],FiveCountry_ModeCases[0][3]],
       ["Nigeria", FiveCountry_MeanCases[4],FiveCountry_MedianCases[4],FiveCountry_ModeCases[0][4]]]


ListDeaths = [["United States",US_MeanDeaths[0],US_MedianDeaths[0],US_ModeDeaths[0][0]],
        ["Bangladesh", FiveCountry_MeanDeaths[0],FiveCountry_MedianDeaths[0],FiveCountry_ModeDeaths[0][0]],
        ["Indonesia", FiveCountry_MeanDeaths[1],FiveCountry_MedianDeaths[1],FiveCountry_ModeDeaths[0][1]],
        ["Pakistan", FiveCountry_MeanDeaths[2],FiveCountry_MedianDeaths[2],FiveCountry_ModeDeaths[0][2]],
        ["Brazil", FiveCountry_MeanDeaths[3],FiveCountry_MedianDeaths[3],FiveCountry_ModeDeaths[0][3]],
       ["Nigeria", FiveCountry_MeanDeaths[4],FiveCountry_MedianDeaths[4],FiveCountry_ModeDeaths[0][4]]]

## WeeklyStatistics Comparision of Cases

In [263]:
CountryWeeklyStatisticsCase = pd.DataFrame(ListCase, columns = ['Country','Mean', 'Median', 'Mode'])

CountryWeeklyStatisticsCase


Unnamed: 0,Country,Mean,Median,Mode
0,United States,1828.41,1785.0,0
1,Bangladesh,173.87,172.0,0
2,Indonesia,97.72,53.0,0
3,Pakistan,107.69,58.0,0
4,Brazil,1804.08,2037.0,0
5,Nigeria,21.9,17.0,0


## WeeklyStatistics Comparision of Deaths

In [264]:
CountryWeeklyStatisticsDeath = pd.DataFrame(ListDeaths, columns = ['Country','Mean', 'Median', 'Mode'])

CountryWeeklyStatisticsDeath

Unnamed: 0,Country,Mean,Median,Mode
0,United States,49.0,47.0,0
1,Bangladesh,2.49,3.0,0
2,Indonesia,3.38,3.0,0
3,Pakistan,2.21,1.0,1
4,Brazil,53.21,64.0,0
5,Nigeria,0.38,0.0,0


# United States  