# Import Packeges

In [0]:
import os
import pandas as pd
from pandas_profiling import ProfileReport

# Set Directories and Pathes

In [13]:
current_dir = os.getcwd()
working_dir = os.path.join(current_dir, 'drive/My Drive/MyCOVID19/forecasting-rates')
dataset_dir = os.path.join(working_dir, 'COVID-19/csse_covid_19_data/csse_covid_19_time_series')
print('current_dir: ', current_dir)
print('working_dir: ', working_dir)
print('dataset_dir: ', dataset_dir)

current_dir:  /content
working_dir:  /content/drive/My Drive/COVID19/forecasting-rates
dataset_dir:  /content/drive/My Drive/COVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series


In [14]:
confirmed_path = os.path.join(dataset_dir, 'time_series_19-covid-Confirmed.csv')
deaths_path = os.path.join(dataset_dir, 'time_series_19-covid-Deaths.csv')
recovered_path = os.path.join(dataset_dir, 'time_series_19-covid-Recovered.csv')
print('confirmed_path: ', confirmed_path)
print('deaths_path: ', deaths_path)
print('recovered_path: ', recovered_path)

confirmed_path:  /content/drive/My Drive/COVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv
deaths_path:  /content/drive/My Drive/COVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv
recovered_path:  /content/drive/My Drive/COVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv


# Load Data

In [0]:
confirmed_df = pd.read_csv(confirmed_path, index_col=False)
deaths_df = pd.read_csv(deaths_path, index_col=False)
recovered_df = pd.read_csv(recovered_path, index_col=False)

Land = [country if str(state) == 'nan' else country+' '+str(state) for state, country in zip(confirmed_df['Province/State'], confirmed_df['Country/Region'])]

confirmed_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)
deaths_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)
recovered_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)

confirmed_df.insert(0, 'Land', Land)
deaths_df.insert(0, 'Land', Land)
recovered_df.insert(0, 'Land', Land)

In [78]:
confirmed_df.head()

Unnamed: 0,Land,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20
0,Thailand,2,3,5,7,8,8,14,14,14,19,19,19,19,25,25,25,25,32,32,32,33,33,33,33,33,34,35,35,35,35,35,35,35,35,37,40,40,41,42,42,43,43,43,47,48,50,50,50,53,59,70,75,82,114,147,177,212,272,322
1,Japan,2,1,2,2,4,4,7,7,11,15,20,20,20,22,22,45,25,25,26,26,26,28,28,29,43,59,66,74,84,94,105,122,147,159,170,189,214,228,241,256,274,293,331,360,420,461,502,511,581,639,639,701,773,839,825,878,889,924,963
2,Singapore,0,1,3,3,4,5,7,7,10,13,16,18,18,24,28,28,30,33,40,45,47,50,58,67,72,75,77,81,84,84,85,85,89,89,91,93,93,93,102,106,108,110,110,117,130,138,150,150,160,178,178,200,212,226,243,266,313,345,385
3,Nepal,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,Malaysia,0,0,0,3,4,4,4,7,8,8,8,8,8,10,12,12,12,16,16,18,18,18,19,19,22,22,22,22,22,22,22,22,22,22,22,22,23,23,25,29,29,36,50,50,83,93,99,117,129,149,149,197,238,428,566,673,790,900,1030


In [0]:
pd_profiler = False
if pd_profiler:
  profile_confirmed = ProfileReport(confirmed_df)
  profile_deaths = ProfileReport(deaths_df)
  profile_recovered = ProfileReport(recovered_df)

  profile_confirmed.to_file(outputfile=os.path.join(working_dir, "Profiling for Confirmed Cases.html"))
  profile_deaths.to_file(outputfile=os.path.join(working_dir, "Profiling for Deaths Cases.html"))
  profile_recovered.to_file(outputfile=os.path.join(working_dir, "Profiling for Recovered Cases.html"))

In [95]:
confirmed_df.describe()

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20
count,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0,477.0
mean,1.163522,1.368973,1.972746,3.006289,4.440252,6.136268,11.69392,12.926625,17.262055,20.811321,25.236897,35.192872,41.679245,50.08805,57.93501,64.60587,72.098532,77.819706,84.171908,89.647799,93.924528,94.802935,126.557652,140.220126,144.716981,149.316562,153.580713,157.51782,158.572327,159.742138,161.054507,164.735849,165.545073,166.809224,168.580713,170.639413,173.48847,176.352201,180.316562,185.259958,189.320755,194.633124,199.412998,205.203354,213.383648,221.84696,230.178197,238.073375,248.620545,263.867925,269.062893,304.387841,327.24109,351.039832,380.559748,413.295597,450.545073,508.821803,570.578616
std,20.38048,20.464206,25.491333,35.494318,49.618662,66.864314,163.876281,164.945181,227.271532,269.471093,331.577197,515.072476,622.609792,767.274479,904.122911,1016.325938,1146.472447,1244.878294,1360.720911,1456.72017,1531.734149,1532.030257,2209.988387,2493.533181,2577.905924,2666.372859,2749.061774,2826.532307,2842.533525,2861.338461,2871.494167,2936.551256,2936.615508,2945.986552,2968.880918,2987.421934,3006.646342,3022.033986,3042.970058,3070.927907,3082.196253,3091.665639,3100.411576,3111.191247,3124.436962,3137.778896,3150.438227,3165.342795,3177.770174,3203.250317,3210.902126,3282.16446,3343.813157,3415.121855,3499.569759,3597.937557,3731.764976,3926.788893,4163.404218
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,3.0,4.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,2.0,3.0,5.0,5.0,7.0,10.0,13.0,18.0,26.0,32.0,45.0,60.0,71.0,86.0,96.0
max,444.0,444.0,549.0,761.0,1058.0,1423.0,3554.0,3554.0,4903.0,5806.0,7153.0,11177.0,13522.0,16678.0,19665.0,22112.0,24953.0,27100.0,29631.0,31728.0,33366.0,33366.0,48206.0,54406.0,56249.0,58182.0,59989.0,61682.0,62031.0,62442.0,62662.0,64084.0,64084.0,64287.0,64786.0,65187.0,65596.0,65914.0,66337.0,66907.0,67103.0,67217.0,67332.0,67466.0,67592.0,67666.0,67707.0,67743.0,67760.0,67773.0,67781.0,67786.0,67790.0,67794.0,67798.0,67799.0,67800.0,67800.0,67800.0


In [118]:
num_total_lands, num_total_days = confirmed_df.shape
print('num_total_lands: {}, num_total_days: {} '.format(num_total_lands, num_total_days))

num_total_lands: 477, num_total_days: 60 


In [98]:
sum_confirmed_per_land = confirmed_df.sum(axis=1)
sum_confirmed_per_land.describe()

count    4.770000e+02
mean     9.551065e+03
std      1.248171e+05
min      0.000000e+00
25%      2.000000e+00
50%      3.100000e+01
75%      5.510000e+02
max      2.691485e+06
dtype: float64

In [0]:
quantile_sum_confirmed_per_land = sum_confirmed_per_land.quantile(q=0.98)

In [0]:
confirmed = confirmed_df[sum_confirmed_per_land > quantile_sum_confirmed_per_land]

In [126]:
num_lands, num_days = confirmed.shape
print('num_lands: {}, num_days: {} '.format(num_lands, num_days))

num_lands: 10, num_days: 60 


In [0]:
base_land = 'China'
sad_land = 'Italy'
good_land = 'South Koera'