In [220]:
import numpy as np
import pandas as pd

## COVID - 19 Data From Jhon Hopkins University  

Data Link -> [GitHub](https://github.com/CSSEGISandData/COVID-19)

In [221]:
total_case_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
consise_link = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv"

## Defining Some Utility Function to Sort and Clean Data

In [222]:
def dropoff(dfs, data_left):
    for i in range(0, len(data_left)):
        ele = data_left[i]
        idx = dfs[dfs.country == ele].index
        dfs.drop(idx, axis = 0, inplace = True)
    
    return dfs

In [223]:
def order_data(dframes):
    dframe = pd.DataFrame(dframes)
    dframe.drop(['Lat', 'Long'], axis = 1, inplace = True)
    dframe.rename(columns={'Province/State':'province','Country/Region':'country'}, inplace = True)
    
    #----------------------------------------------------------
    country_list = np.sort(np.array(dframe.country.unique()))
    repeated = {}
    rep_country = []
    rep_first_dict = {}
    
    for i in range(0, len(country_list)):
        idx = np.array(dframe[dframe.country == country_list[i]].index)
        
        if len(idx) > 1:
            repeated[country_list[i]] = idx
            rep_country.append(country_list[i])
            rep_first_dict[idx[0]] = country_list[i]
        
        else:
            continue
    
    #---------------------------------------------------------
    to_drop = []
    to_change = []
    
    for i in range(0, len(rep_country)):
        nation = rep_country[i]
        index = repeated[nation]
        first_index = index[0]
        to_change.append(first_index)
        
        last_index = index[1:]
        for j in last_index:
            to_drop.append(j)
        
        dframe.iloc[first_index] = np.sum(dframe.iloc[index], axis = 0)
    
    
    #----------------------------------------------------------
    for i in to_change:
        dframe.loc[i, 'country'] = rep_first_dict[i]
    
    
    #----------------------------------------------------------
    dframe.drop(to_drop, inplace = True)
    
    
    #----------------------------------------------------------
    to_drop = ['Diamond Princess', 'Kosovo', 'MS Zaandam']

    new_frame = dropoff(dframe, to_drop)
    new_frame = dframe.drop(columns=['province'])
    new_frame.set_index('country', inplace = True)
    
    #--------------------
    return new_frame

In [224]:
def clean_data(dframe, dframe1, dframe2):
    cno = dframe.shape[0]
    dno = dframe.shape[1]
    entrys = cno*dno
    date = np.array(dframe.columns)
    country_list = np.array(dframe.index)
    
    nations = []
    dates = []
    values = []
    deaths = []
    recovery = []
    
    for i in range(0, cno):
        for j in range(0, dno):
            nations.append(country_list[i])
            dates.append(date[j])
            val = dframe.get_value(country_list[i], date[j])
            values.append(val)
    
    for i in range(0, cno):
        for j in range(0, dno):
            val1 = dframe1.get_value(country_list[i], date[j])
            recovery.append(val1)
    
    for i in range(0, cno):
        for j in range(0, dno):
            val2 = dframe2.get_value(country_list[i], date[j])
            deaths.append(val2)
    
    values = np.array(values).astype(np.int32)
    recovery = np.array(recovery).astype(np.int32)
    deaths = np.array(deaths).astype(np.int32)
    res = {'country':nations, 'date':dates, 'total_cases':values, 'recovered_cases':recovery, 'total_deaths':deaths}
    rec = pd.DataFrame(res)
    rec['date'] = pd.to_datetime(rec['date'])
    rec.fillna(0)
    
    return rec

In [225]:
def get_timeSeries_data(total_case_link, recovered_link, deaths_link):
    
    total = pd.read_csv(total_case_link)
    recovered = pd.read_csv(recovered_link)
    deaths = pd.read_csv(deaths_link)

    dframe = order_data(total)
    dframe1 = order_data(recovered)
    dframe2 = order_data(deaths)

    df = clean_data(dframe, dframe1, dframe2)

    to_drop = ['Diamond Princess', 'Kosovo', 'MS Zaandam']

    df_final = dropoff(df, to_drop)

    return df_final

In [226]:
def get_cumulativeCountry_data(consise_link):

    datac = pd.read_csv(consise_link)
    data = pd.DataFrame(datac)

    data = data.drop(['People_Tested', 'People_Hospitalized'], axis =1)
    data['Last_Update'] = pd.to_datetime(data['Last_Update']) 
    data = data.rename(columns = {'Country_Region':'country'})

    to_drop_from_data = ['Diamond Princess', 'Kosovo', 'MS Zaandam']

    cumulative = dropoff(data, to_drop_from_data)
    
    cumulative.set_index('country', inplace = True)

    return cumulative

## Downloading time series data into a DataFrame

In [227]:
df = get_timeSeries_data(total_case_link, recovered_link, deaths_link)



In [228]:
df

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths
0,Afghanistan,2020-01-22,0,0,0
1,Afghanistan,2020-01-23,0,0,0
2,Afghanistan,2020-01-24,0,0,0
3,Afghanistan,2020-01-25,0,0,0
4,Afghanistan,2020-01-26,0,0,0
...,...,...,...,...,...
31260,Lesotho,2020-07-04,35,11,0
31261,Lesotho,2020-07-05,79,11,0
31262,Lesotho,2020-07-06,91,11,0
31263,Lesotho,2020-07-07,91,11,0


## Downloading Cumulative COVID 19 country data into a DataFrame

In [229]:
cumulative = get_cumulativeCountry_data(consise_link)

In [230]:
cumulative

Unnamed: 0_level_0,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Incident_Rate,Mortality_Rate,UID,ISO3
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Australia,2020-07-09 08:33:53,-25.000000,133.000000,9059.0,106.0,7575.0,1378.0,35.581723,1.170107,36,AUS
Austria,2020-07-09 08:33:53,47.516200,14.550100,18513.0,706.0,16721.0,1086.0,205.553828,3.813536,40,AUT
Canada,2020-07-09 08:33:53,60.001000,-95.001000,108334.0,8786.0,71805.0,27744.0,286.176175,8.110104,124,CAN
China,2020-07-09 08:33:53,30.592800,114.305500,84950.0,4641.0,79802.0,507.0,6.047657,5.463214,156,CHN
Denmark,2020-07-09 08:33:53,56.263900,9.501800,13101.0,609.0,12202.0,290.0,226.183371,4.648500,208,DNK
...,...,...,...,...,...,...,...,...,...,...,...
West Bank and Gaza,2020-07-09 08:33:53,31.952200,35.233200,5029.0,20.0,494.0,4515.0,98.580473,0.397693,275,PSE
Western Sahara,2020-07-09 08:33:53,24.215500,-12.885800,10.0,1.0,8.0,1.0,1.674116,10.000000,732,ESH
Yemen,2020-07-09 08:33:53,15.552727,48.516388,1318.0,351.0,595.0,372.0,4.418968,26.631259,887,YEM
Zambia,2020-07-09 08:33:53,-13.133897,27.849332,1895.0,42.0,1348.0,505.0,10.307901,2.216359,894,ZMB


## Importing Population Data

In [231]:
population = pd.read_csv('population_data.csv')
population.drop(['Unnamed: 0'], axis = 1, inplace = True)
population.set_index('country', inplace = True)
population

Unnamed: 0_level_0,population,median_age,urban_percentage_share,world_share
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
China,1438207241,38,61 %,18.47 %
India,1377233523,28,35 %,17.70 %
US,330610570,38,83 %,4.25 %
Indonesia,272931713,30,56 %,3.51 %
Pakistan,219992900,23,35 %,2.83 %
...,...,...,...,...
Saint Kitts and Nevis,53123,N.A.,33 %,0.00 %
Monaco,39186,N.A.,N.A.,0.00 %
Liechtenstein,38106,N.A.,15 %,0.00 %
San Marino,33917,N.A.,97 %,0.00 %


## Studying Data

In [232]:
for i in range(0, 185):
    print(population['urban_percentage_share'][i])

61 %
35 %
83 %
56 %
35 %
88 %
52 %
39 %
74 %
84 %
92 %
21 %
47 %
43 %
38 %
46 %
76 %
76 %
76 %
51 %
83 %
82 %
69 %
37 %
67 %
31 %
28 %
82 %
80 %
80 %
26 %
93 %
73 %
35 %
69 %
73 %
25 %
60 %
81 %
64 %
84 %
50 %
79 %
67 %
78 %
38 %
57 %
38 %
21 %
N.A.
39 %
56 %
51 %
86 %
17 %
79 %
18 %
31 %
44 %
55 %
18 %
85 %
58 %
45 %
52 %
63 %
60 %
92 %
49 %
24 %
23 %
47 %
38 %
39 %
18 %
48 %
14 %
70 %
69 %
98 %
57 %
78 %
25 %
85 %
74 %
85 %
91 %
66 %
56 %
88 %
57 %
86 %
72 %
27 %
79 %
57 %
13 %
56 %
93 %
74 %
43 %
43 %
36 %
62 %
76 %
78 %
78 %
57 %
36 %
73 %
N.A.
88 %
86 %
70 %
54 %
83 %
87 %
80 %
80 %
53 %
63 %
43 %
87 %
57 %
68 %
N.A.
58 %
43 %
58 %
63 %
96 %
52 %
67 %
63 %
55 %
96 %
63 %
71 %
55 %
59 %
73 %
87 %
31 %
59 %
55 %
45 %
69 %
89 %
73 %
52 %
68 %
33 %
41 %
67 %
30 %
79 %
59 %
29 %
27 %
46 %
68 %
88 %
87 %
65 %
68 %
35 %
93 %
80 %
46 %
86 %
94 %
31 %
74 %
19 %
35 %
53 %
56 %
26 %
88 %
74 %
33 %
N.A.
15 %
97 %
N.A.


In [233]:
for i in range(0, 185):
    print(population['median_age'][i], " years")

38  years
28  years
38  years
30  years
23  years
33  years
18  years
28  years
40  years
29  years
48  years
19  years
26  years
25  years
32  years
17  years
32  years
32  years
46  years
40  years
40  years
42  years
47  years
18  years
28  years
29  years
20  years
44  years
31  years
45  years
17  years
32  years
29  years
20  years
41  years
21  years
18  years
42  years
41  years
30  years
32  years
28  years
31  years
17  years
30  years
18  years
22  years
20  years
25  years
30  years
20  years
19  years
19  years
38  years
15  years
42  years
34  years
18  years
16  years
43  years
18  years
35  years
31  years
18  years
23  years
28  years
26  years
43  years
19  years
26  years
17  years
17  years
19  years
18  years
20  years
19  years
17  years
33  years
26  years
42  years
24  years
42  years
19  years
28  years
43  years
46  years
24  years
46  years
32  years
41  years
24  years
33  years
43  years
22  years
40  years
43  years
22  years
42  years
30  years
43  years


In [234]:
df.head()

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths
0,Afghanistan,2020-01-22,0,0,0
1,Afghanistan,2020-01-23,0,0,0
2,Afghanistan,2020-01-24,0,0,0
3,Afghanistan,2020-01-25,0,0,0
4,Afghanistan,2020-01-26,0,0,0


In [235]:
df['new_reported_cases'] = [0]*df.shape[0]
df['daily_recovered_cases'] = [0]*df.shape[0]
df['daily_deaths'] = [0]*df.shape[0]
df['active_cases'] = [0]*df.shape[0]
df['cases_growth_rate_in_percent'] = [0]*df.shape[0]

for i in range(1, df.shape[0]):
    df['new_reported_cases'][i] = df['total_cases'][i] -df['total_cases'][i-1] 
    df['daily_recovered_cases'][i] = df['recovered_cases'][i] -df['recovered_cases'][i-1]
    df['daily_deaths'][i] = df['total_deaths'][i] -df['total_deaths'][i-1]
    df['active_cases'][i] = df['total_cases'][i] -df['recovered_cases'][i] - df['total_deaths'][i]
    df['cases_growth_rate_in_percent'][i] = (df['new_reported_cases'][i]/df['total_cases'][i-1])*100

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
A value is t

In [236]:
days = int(df.shape[0]/185)
indexs1 = []
for i in range(0,185):
    indexs1.append(i*days)


df.loc[indexs1 , 'new_reported_cases'] = 0
df.loc[indexs1 , 'daily_recovered_cases'] = 0
df.loc[indexs1 , 'daily_deaths'] = 0
df.loc[indexs1 , 'cases_growth_rate_in_percent'] = 0
df.head()

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent
0,Afghanistan,2020-01-22,0,0,0,0,0,0,0,0.0
1,Afghanistan,2020-01-23,0,0,0,0,0,0,0,
2,Afghanistan,2020-01-24,0,0,0,0,0,0,0,
3,Afghanistan,2020-01-25,0,0,0,0,0,0,0,
4,Afghanistan,2020-01-26,0,0,0,0,0,0,0,


In [237]:
df['mortality_rate'] = df['total_deaths']/df['total_cases']*100
df['recovery_rate'] = df['recovered_cases']/df['total_cases']*100
df = df.fillna(0)

In [238]:
df[df.country == 'India']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate
11830,India,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000
11831,India,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000
11832,India,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000
11833,India,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000
11834,India,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
11994,India,2020-07-04,673165,409083,19268,24850,14856,613,244814,3.833013,2.862300,60.770094
11995,India,2020-07-05,697413,424433,19693,24248,15350,425,253287,3.602089,2.823721,60.858200
11996,India,2020-07-06,719664,439934,20159,22251,15501,466,259571,3.190505,2.801168,61.130472
11997,India,2020-07-07,742417,456831,20642,22753,16897,483,264944,3.161614,2.780378,61.532939


In [239]:
#Calulating doubling time
to_remove = []
for i in range(0,185):
    for j in range(0,6):
        to_remove.append((i*days) + j)


r_avg = df['cases_growth_rate_in_percent'].rolling(7).mean()
dbl = np.log(2)/np.log(1 + (r_avg/100))
df['doubling_time_in_days'] = dbl
df.loc[to_remove, 'doubling_time_in_days'] = 0

In [240]:
df[df.country == 'India']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days
11830,India,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000
11831,India,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000
11832,India,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000
11833,India,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000
11834,India,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11994,India,2020-07-04,673165,409083,19268,24850,14856,613,244814,3.833013,2.862300,60.770094,20.109419
11995,India,2020-07-05,697413,424433,19693,24248,15350,425,253287,3.602089,2.823721,60.858200,20.171892
11996,India,2020-07-06,719664,439934,20159,22251,15501,466,259571,3.190505,2.801168,61.130472,20.324965
11997,India,2020-07-07,742417,456831,20642,22753,16897,483,264944,3.161614,2.780378,61.532939,20.429991


In [241]:
#adding active cases per million and total cases per million
cases_per_million = []
active_cases_per_million = []
population_share = []

for i in range(0, df.shape[0]):
    cntry = df['country'][i]
    cases = df['total_cases'][i]
    active = df['active_cases'][i]
    population_size = population['population'][cntry]
    share = population['world_share'][cntry]
    
    entry_cases = cases/population_size*1000000
    cases_per_million.append(entry_cases)
    
    entry_active = active/population_size*1000000
    active_cases_per_million.append(entry_active)
    
    population_share.append(share)

df['total_cases_per_million'] = cases_per_million
df['active_cases_per_million'] = active_cases_per_million
df['world_population_share'] = population_share

In [242]:
df[df.country == 'India']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share
11830,India,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %
11831,India,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %
11832,India,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %
11833,India,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %
11834,India,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11994,India,2020-07-04,673165,409083,19268,24850,14856,613,244814,3.833013,2.862300,60.770094,20.109419,488.780580,177.757799,17.70 %
11995,India,2020-07-05,697413,424433,19693,24248,15350,425,253287,3.602089,2.823721,60.858200,20.171892,506.386890,183.909988,17.70 %
11996,India,2020-07-06,719664,439934,20159,22251,15501,466,259571,3.190505,2.801168,61.130472,20.324965,522.543191,188.472758,17.70 %
11997,India,2020-07-07,742417,456831,20642,22753,16897,483,264944,3.161614,2.780378,61.532939,20.429991,539.063991,192.374057,17.70 %


In [243]:
df[df.country == 'US']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share
26195,US,2020-01-22,1,0,0,0,0,0,1,0.000000,0.000000,0.000000,0.000000,0.003025,0.003025,4.25 %
26196,US,2020-01-23,1,0,0,0,0,0,1,0.000000,0.000000,0.000000,0.000000,0.003025,0.003025,4.25 %
26197,US,2020-01-24,2,0,0,1,0,0,2,100.000000,0.000000,0.000000,0.000000,0.006049,0.006049,4.25 %
26198,US,2020-01-25,2,0,0,0,0,0,2,0.000000,0.000000,0.000000,0.000000,0.006049,0.006049,4.25 %
26199,US,2020-01-26,5,0,0,3,0,0,5,150.000000,0.000000,0.000000,0.000000,0.015124,0.015124,4.25 %
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26359,US,2020-07-04,2841241,894325,129689,45880,103921,247,1817227,1.641291,4.564520,31.476563,39.171911,8593.920636,5496.578648,4.25 %
26360,US,2020-07-05,2891124,906763,129960,49883,12438,271,1854401,1.755676,4.495138,31.363684,38.558320,8744.802079,5609.019095,4.25 %
26361,US,2020-07-06,2936077,924148,130285,44953,17385,325,1881644,1.554862,4.437384,31.475605,38.764367,8880.771719,5691.421179,4.25 %
26362,US,2020-07-07,2996098,936476,131480,60021,12328,1195,1928142,2.044258,4.388374,31.256521,37.935536,9062.317639,5832.063990,4.25 %


In [244]:
num_country = len(df.country.unique())
num_dates = len(df.date.unique())

country_list = np.array(df.country.unique())
date_list = df.date.unique()

total_case_share = []
active_case_share = []
death_share = []
recovered_case_share = []


total_case_share_by_date = []
active_case_share_by_date = []
death_share_by_date = []
recovered_case_share_by_date = []

for i  in date_list:
    
    total_day_case = df[df.date == i].sum()['total_cases']
    active_day_case = df[df.date == i].sum()['active_cases']
    death_day_case = df[df.date == i].sum()['total_deaths']
    recovered_day_case = df[df.date == i].sum()['recovered_cases']
    
    total_case_share_by_date.append(total_day_case)
    active_case_share_by_date.append(active_day_case)
    death_share_by_date.append(death_day_case)
    recovered_case_share_by_date.append(recovered_day_case)
    
for j in country_list:
    idx = df[df.country == j].index
    
    tot = np.array(df['total_cases'][idx])/total_case_share_by_date*100
    act = np.array(df['active_cases'][idx])/active_case_share_by_date*100
    dea = np.array(df['total_deaths'][idx])/death_share_by_date*100
    reco = np.array(df['recovered_cases'][idx])/recovered_case_share_by_date*100
    
    total_case_share = np.concatenate([total_case_share,tot])
    active_case_share = np.concatenate([active_case_share, act])
    death_share = np.concatenate([death_share, dea])
    recovered_case_share = np.concatenate([recovered_case_share,reco])

df['world_total_cases_share'] = total_case_share
df['world_active_cases_share'] = active_case_share
df['world_deaths_share'] = death_share
df['world_recovered_cases_share'] = recovered_case_share
    

In [245]:
df[df.country == 'India']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
11830,India,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %,0.000000,0.000000,0.000000,0.000000
11831,India,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %,0.000000,0.000000,0.000000,0.000000
11832,India,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %,0.000000,0.000000,0.000000,0.000000
11833,India,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %,0.000000,0.000000,0.000000,0.000000
11834,India,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,17.70 %,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11994,India,2020-07-04,673165,409083,19268,24850,14856,613,244814,3.833013,2.862300,60.770094,20.109419,488.780580,177.757799,17.70 %,5.975547,5.233948,3.630697,6.753657
11995,India,2020-07-05,697413,424433,19693,24248,15350,425,253287,3.602089,2.823721,60.858200,20.171892,506.386890,183.909988,17.70 %,6.091944,5.346496,3.686454,6.871761
11996,India,2020-07-06,719664,439934,20159,22251,15501,466,259571,3.190505,2.801168,61.130472,20.324965,522.543191,188.472758,17.70 %,6.195525,5.432827,3.747249,6.982998
11997,India,2020-07-07,742417,456831,20642,22753,16897,483,264944,3.161614,2.780378,61.532939,20.429991,539.063991,192.374057,17.70 %,6.278273,5.478521,3.794004,7.088081


In [249]:
df[df.country == 'US']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
26195,US,2020-01-22,1,0,0,0,0,0,1,0.000000,0.000000,0.000000,0.000000,0.003025,0.003025,4.25 %,0.180180,0.196078,0.000000,0.000000
26196,US,2020-01-23,1,0,0,0,0,0,1,0.000000,0.000000,0.000000,0.000000,0.003025,0.003025,4.25 %,0.152905,0.165017,0.000000,0.000000
26197,US,2020-01-24,2,0,0,1,0,0,2,100.000000,0.000000,0.000000,0.000000,0.006049,0.006049,4.25 %,0.212540,0.227531,0.000000,0.000000
26198,US,2020-01-25,2,0,0,0,0,0,2,0.000000,0.000000,0.000000,0.000000,0.006049,0.006049,4.25 %,0.139470,0.147820,0.000000,0.000000
26199,US,2020-01-26,5,0,0,3,0,0,5,150.000000,0.000000,0.000000,0.000000,0.015124,0.015124,4.25 %,0.236072,0.248756,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26359,US,2020-07-04,2841241,894325,129689,45880,103921,247,1817227,1.641291,4.564520,31.476563,39.171911,8593.920636,5496.578648,4.25 %,25.221110,38.851013,24.437485,14.764643
26360,US,2020-07-05,2891124,906763,129960,49883,12438,271,1854401,1.755676,4.495138,31.363684,38.558320,8744.802079,5609.019095,4.25 %,25.254140,39.143533,24.328013,14.680900
26361,US,2020-07-06,2936077,924148,130285,44953,17385,325,1881644,1.554862,4.437384,31.475605,38.764367,8880.771719,5691.421179,4.25 %,25.276434,39.382849,24.217983,14.668846
26362,US,2020-07-07,2996098,936476,131480,60021,12328,1195,1928142,2.044258,4.388374,31.256521,37.935536,9062.317639,5832.063990,4.25 %,25.336600,39.870183,24.166052,14.530139


In [250]:
df[df.country == 'Russia']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
20787,Russia,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.87 %,0.000000,0.000000,0.000000,0.000000
20788,Russia,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.87 %,0.000000,0.000000,0.000000,0.000000
20789,Russia,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.87 %,0.000000,0.000000,0.000000,0.000000
20790,Russia,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.87 %,0.000000,0.000000,0.000000,0.000000
20791,Russia,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.87 %,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20951,Russia,2020-07-04,673564,446127,10011,6623,8972,167,217426,0.993041,1.486273,66.233795,67.399352,4615.917777,1490.015111,1.87 %,5.979089,4.648412,1.886387,7.365226
20952,Russia,2020-07-05,680283,449995,10145,6719,3868,134,220143,0.997530,1.491291,66.148206,68.194761,4661.962921,1508.634647,1.87 %,5.942312,4.646878,1.899105,7.285621
20953,Russia,2020-07-06,686852,453570,10280,6569,3575,135,223002,0.965628,1.496683,66.036060,69.051764,4706.980119,1528.227304,1.87 %,5.913050,4.667437,1.910894,7.199440
20954,Russia,2020-07-07,693215,463103,10478,6363,9533,198,219634,0.926400,1.511508,66.805104,70.213425,4750.585604,1505.146482,1.87 %,5.862195,4.541599,1.925859,7.185396


In [251]:
df[df.country == 'Brazil']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
3549,Brazil,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.73 %,0.000000,0.000000,0.000000,0.000000
3550,Brazil,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.73 %,0.000000,0.000000,0.000000,0.000000
3551,Brazil,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.73 %,0.000000,0.000000,0.000000,0.000000
3552,Brazil,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.73 %,0.000000,0.000000,0.000000,0.000000
3553,Brazil,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.73 %,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3713,Brazil,2020-07-04,1577004,990731,64265,37923,6116,1091,522008,2.464003,4.075132,62.823620,26.543911,7429.826130,2459.365150,2.73 %,13.998739,11.160158,12.109547,16.356235
3714,Brazil,2020-07-05,1603055,1029045,64867,26051,38314,602,509143,1.651930,4.046461,64.192744,27.524275,7552.561646,2398.753564,2.73 %,14.002781,10.747220,12.142853,16.660700
3715,Brazil,2020-07-06,1623284,1062542,65487,20229,33497,620,495255,1.261903,4.034229,65.456322,28.351856,7647.867652,2333.322262,2.73 %,13.974712,10.365698,12.173029,16.865551
3716,Brazil,2020-07-07,1668589,1107012,66741,45305,44470,1254,494836,2.790947,3.999847,66.344199,27.848304,7861.315604,2331.348204,2.73 %,14.110477,10.232235,12.267010,17.176134


In [252]:
df[df.country == 'China']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
5408,China,2020-01-22,548,28,17,0,0,0,503,0.000000,3.102190,5.109489,0.000000,0.381030,0.349741,18.47 %,98.738739,98.627451,100.000000,100.000000
5409,China,2020-01-23,643,30,18,95,2,1,595,17.335766,2.799378,4.665630,0.000000,0.447084,0.413710,18.47 %,98.318043,98.184818,100.000000,100.000000
5410,China,2020-01-24,920,36,26,277,6,8,858,43.079316,2.826087,3.913043,0.000000,0.639685,0.596576,18.47 %,97.768332,97.610922,100.000000,100.000000
5411,China,2020-01-25,1406,39,42,486,3,16,1325,52.826087,2.987198,2.773826,0.000000,0.977606,0.921286,18.47 %,98.047420,97.930525,100.000000,100.000000
5412,China,2020-01-26,2075,49,56,669,10,14,1970,47.581792,2.698795,2.361446,0.000000,1.442768,1.369761,18.47 %,97.969783,98.009950,100.000000,94.230769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5572,China,2020-07-04,84857,79706,4641,19,26,0,510,0.022396,5.469201,93.929788,3609.140226,59.001928,0.354608,18.47 %,0.753258,0.010903,0.874510,1.315887
5573,China,2020-07-05,84871,79718,4641,14,12,0,512,0.016498,5.468299,93.928433,3609.736044,59.011662,0.355999,18.47 %,0.741353,0.010808,0.868777,1.290670
5574,China,2020-07-06,84889,79725,4641,18,7,0,523,0.021209,5.467139,93.916762,3776.237935,59.024178,0.363647,18.47 %,0.730802,0.010946,0.862691,1.265462
5575,China,2020-07-07,84917,79754,4641,28,29,0,522,0.032984,5.465337,93.919945,3118.880618,59.043647,0.362952,18.47 %,0.718103,0.010794,0.853017,1.237444


In [254]:
df[df.country == 'United Kingdom']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
25857,United Kingdom,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.87 %,0.000000,0.000000,0.000000,0.000000
25858,United Kingdom,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.87 %,0.000000,0.000000,0.000000,0.000000
25859,United Kingdom,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.87 %,0.000000,0.000000,0.000000,0.000000
25860,United Kingdom,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.87 %,0.000000,0.000000,0.000000,0.000000
25861,United Kingdom,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.87 %,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26021,United Kingdom,2020-07-04,286412,1375,44283,625,0,67,240754,0.218694,15.461294,0.480078,526.437750,4223.487570,3550.205740,0.87 %,2.542420,5.147148,8.344309,0.022700
26022,United Kingdom,2020-07-05,286931,1375,44305,519,0,22,241251,0.181207,15.440995,0.479209,505.730373,4231.140846,3557.534600,0.87 %,2.506359,5.092435,8.293726,0.022262
26023,United Kingdom,2020-07-06,287290,1375,44321,359,0,16,241594,0.125117,15.427269,0.478610,559.965999,4236.434731,3562.592545,0.87 %,2.473255,5.056568,8.238594,0.021825
26024,United Kingdom,2020-07-07,287874,1375,44476,584,0,155,242023,0.203279,15.449815,0.477640,522.938892,4245.046509,3568.918663,0.87 %,2.434416,5.004560,8.174698,0.021334


In [255]:
df[df.country == 'Italy']

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
12844,Italy,2020-01-22,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.78 %,0.000000,0.000000,0.000000,0.000000
12845,Italy,2020-01-23,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.78 %,0.000000,0.000000,0.000000,0.000000
12846,Italy,2020-01-24,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.78 %,0.000000,0.000000,0.000000,0.000000
12847,Italy,2020-01-25,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.78 %,0.000000,0.000000,0.000000,0.000000
12848,Italy,2020-01-26,0,0,0,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.78 %,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13008,Italy,2020-07-04,241419,191944,34854,235,477,21,14621,0.097436,14.437140,79.506584,910.552577,3991.754287,241.751641,0.78 %,2.143027,0.312587,6.567589,3.168853
13009,Italy,2020-07-05,241611,192108,34861,192,164,7,14642,0.079530,14.428565,79.511281,898.637298,3994.928920,242.098867,0.78 %,2.110486,0.309070,6.525845,3.110315
13010,Italy,2020-07-06,241819,192241,34869,208,133,8,14709,0.086089,14.419462,79.497889,845.947315,3998.368106,243.206681,0.78 %,2.081799,0.307860,6.481612,3.051409
13011,Italy,2020-07-07,241956,192815,34899,137,574,30,14242,0.056654,14.423697,79.690109,849.507109,4000.633339,235.485047,0.78 %,2.046109,0.294497,6.414444,2.991672


In [246]:
# Latest Day Data
ts = df['date'][df.shape[0]-1]
last_day_df = pd.DataFrame(df[df.date == ts])

In [247]:
last_day_df

Unnamed: 0,country,date,total_cases,recovered_cases,total_deaths,new_reported_cases,daily_recovered_cases,daily_deaths,active_cases,cases_growth_rate_in_percent,mortality_rate,recovery_rate,doubling_time_in_days,total_cases_per_million,active_cases_per_million,world_population_share,world_total_cases_share,world_active_cases_share,world_deaths_share,world_recovered_cases_share
168,Afghanistan,2020-07-08,33594,20700,936,210,521,16,11958,0.629044,2.786212,61.618146,90.252416,867.100565,308.650013,0.50 %,0.279092,0.243870,0.170377,0.314395
337,Albania,2020-07-08,3106,1791,83,68,47,2,1232,2.238315,2.672247,57.662589,26.142751,1079.064209,428.012590,0.04 %,0.025804,0.025125,0.015108,0.027202
506,Algeria,2020-07-08,17348,12329,978,469,235,10,4041,2.778601,5.637537,71.068711,24.859373,397.110097,92.501839,0.56 %,0.144124,0.082412,0.178022,0.187255
675,Andorra,2020-07-08,855,802,52,0,2,0,1,0.000000,6.081871,93.801170,inf,11069.394096,12.946660,0.00 %,0.007103,0.000020,0.009465,0.012181
844,Angola,2020-07-08,386,117,21,0,0,0,248,0.000000,5.440415,30.310881,16.834852,11.824248,7.596926,0.42 %,0.003207,0.005058,0.003823,0.001777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30588,Sao Tome and Principe,2020-07-08,724,283,13,0,4,0,428,0.000000,1.795580,39.088398,387.666078,3316.415340,1960.532825,0.00 %,0.006015,0.008729,0.002366,0.004298
30757,Yemen,2020-07-08,1318,595,351,21,4,3,372,1.619121,26.631259,45.144158,47.443479,44.396217,12.530647,0.38 %,0.010950,0.007587,0.063891,0.009037
30926,Comoros,2020-07-08,313,272,7,2,6,0,34,0.643087,2.236422,86.900958,148.702207,361.558792,39.274757,0.01 %,0.002600,0.000693,0.001274,0.004131
31095,Tajikistan,2020-07-08,6364,5011,54,49,46,1,1299,0.775930,0.848523,78.739786,83.463607,670.435178,136.847155,0.12 %,0.052871,0.026492,0.009829,0.076108


In [248]:
df.to_csv(r'covid_time_series.csv')