# Import Packeges

In [0]:
import os
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pandas_profiling import ProfileReport

# Set Directories and Pathes

In [2]:
current_dir = os.getcwd()
working_dir = os.path.join(current_dir, 'drive/My Drive/MyCOVID19/forecasting-rates')
dataset_dir = os.path.join(working_dir, 'COVID-19/csse_covid_19_data/csse_covid_19_time_series')
print('current_dir: ', current_dir)
print('working_dir: ', working_dir)
print('dataset_dir: ', dataset_dir)

current_dir:  /content
working_dir:  /content/drive/My Drive/MyCOVID19/forecasting-rates
dataset_dir:  /content/drive/My Drive/MyCOVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series


In [0]:
path_tree = 

In [3]:
confirmed_path = os.path.join(dataset_dir, 'time_series_19-covid-Confirmed.csv')
deceased_path = os.path.join(dataset_dir, 'time_series_19-covid-Deaths.csv')
recovered_path = os.path.join(dataset_dir, 'time_series_19-covid-Recovered.csv')
print('confirmed_path: ', confirmed_path)
print('deceased_path: ', deceased_path)
print('recovered_path: ', recovered_path)

confirmed_path:  /content/drive/My Drive/MyCOVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv
deceased_path:  /content/drive/My Drive/MyCOVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv
recovered_path:  /content/drive/My Drive/MyCOVID19/forecasting-rates/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv


# Load Data

In [0]:
confirmed_df = pd.read_csv(confirmed_path, index_col=False)
deceased_df = pd.read_csv(deceased_path, index_col=False)
recovered_df = pd.read_csv(recovered_path, index_col=False)

Land = [country if str(state) == 'nan' else country+' '+str(state) for state, country in zip(confirmed_df['Province/State'], confirmed_df['Country/Region'])]

confirmed_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)
deceased_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)
recovered_df.drop(columns=['Lat', 'Long', 'Province/State', 'Country/Region'], inplace=True)

confirmed_df.insert(0, 'Land', Land)
deceased_df.insert(0, 'Land', Land)
recovered_df.insert(0, 'Land', Land)

In [5]:
confirmed_df.head()

Unnamed: 0,Land,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20
0,Thailand,2,3,5,7,8,8,14,14,14,19,19,19,19,25,25,25,25,32,32,32,33,33,33,33,33,34,35,35,35,35,35,35,35,35,37,40,40,41,42,42,43,43,43,47,48,50,50,50,53,59,70,75,82,114,147,177,212,272,322,411
1,Japan,2,1,2,2,4,4,7,7,11,15,20,20,20,22,22,45,25,25,26,26,26,28,28,29,43,59,66,74,84,94,105,122,147,159,170,189,214,228,241,256,274,293,331,360,420,461,502,511,581,639,639,701,773,839,825,878,889,924,963,1007
2,Singapore,0,1,3,3,4,5,7,7,10,13,16,18,18,24,28,28,30,33,40,45,47,50,58,67,72,75,77,81,84,84,85,85,89,89,91,93,93,93,102,106,108,110,110,117,130,138,150,150,160,178,178,200,212,226,243,266,313,345,385,432
3,Nepal,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
4,Malaysia,0,0,0,3,4,4,4,7,8,8,8,8,8,10,12,12,12,16,16,18,18,18,19,19,22,22,22,22,22,22,22,22,22,22,22,22,23,23,25,29,29,36,50,50,83,93,99,117,129,149,149,197,238,428,566,673,790,900,1030,1183


In [0]:
pd_profiler = False
if pd_profiler:
  profile_confirmed = ProfileReport(confirmed_df)
  profile_deceased = ProfileReport(deceased_df)
  profile_recovered = ProfileReport(recovered_df)

  profile_confirmed.to_file(outputfile=os.path.join(working_dir, "Profiling for Confirmed Cases.html"))
  profile_deceased.to_file(outputfile=os.path.join(working_dir, "Profiling for Deceased Cases.html"))
  profile_recovered.to_file(outputfile=os.path.join(working_dir, "Profiling for Recovered Cases.html"))

In [7]:
confirmed_df.describe()

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20
count,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0,482.0
mean,1.151452,1.354772,1.952282,2.975104,4.394191,6.072614,11.572614,12.792531,17.082988,20.595436,24.975104,34.827801,41.246888,49.568465,57.334025,63.935685,71.350622,77.012448,83.298755,88.717842,92.950207,93.819502,125.244813,138.76556,143.215768,147.767635,151.987552,155.883817,156.927386,158.085062,159.383817,163.026971,163.827801,165.078838,166.83195,168.869295,171.688797,174.522822,178.446058,183.338174,187.356846,192.614108,197.344398,203.074689,211.170124,219.545643,227.790456,235.603734,246.041494,261.130705,266.271784,301.23029,323.846473,347.39834,376.612033,409.008299,445.871369,503.543568,564.659751,631.792531
std,20.274619,20.358039,25.359285,35.310671,49.36215,66.51879,163.026623,164.090876,226.09398,268.075168,329.859252,512.400824,619.379747,763.293055,899.430646,1011.051013,1140.521528,1238.416283,1353.657003,1449.157606,1523.781934,1524.077053,2198.509428,2480.579942,2564.514248,2652.521403,2734.780549,2811.8484,2827.766586,2846.4739,2856.577279,2921.296427,2921.360813,2930.683608,2953.459328,2971.904626,2991.030218,3006.339127,3027.168136,3054.982833,3066.1948,3075.618067,3084.32139,3095.048551,3108.230612,3121.508769,3134.107986,3148.940514,3161.311123,3186.6703,3194.286424,3265.206784,3326.553853,3397.511937,3481.547181,3579.43387,3712.599681,3906.666957,4142.112667,4420.2962
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,4.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,2.0,3.0,4.0,5.0,7.0,9.0,12.0,17.75,26.0,32.0,40.5,57.5,69.5,84.0,93.75,127.5
max,444.0,444.0,549.0,761.0,1058.0,1423.0,3554.0,3554.0,4903.0,5806.0,7153.0,11177.0,13522.0,16678.0,19665.0,22112.0,24953.0,27100.0,29631.0,31728.0,33366.0,33366.0,48206.0,54406.0,56249.0,58182.0,59989.0,61682.0,62031.0,62442.0,62662.0,64084.0,64084.0,64287.0,64786.0,65187.0,65596.0,65914.0,66337.0,66907.0,67103.0,67217.0,67332.0,67466.0,67592.0,67666.0,67707.0,67743.0,67760.0,67773.0,67781.0,67786.0,67790.0,67794.0,67798.0,67799.0,67800.0,67800.0,67800.0,67800.0


In [8]:
num_total_lands, num_total_days = confirmed_df.shape
print('num_total_lands: {}, num_total_days: {} '.format(num_total_lands, num_total_days))

num_total_lands: 482, num_total_days: 61 


In [9]:
sum_confirmed_per_land = confirmed_df.sum(axis=1)
sum_confirmed_per_land.describe()

count    4.820000e+02
mean     1.008378e+04
std      1.277025e+05
min      0.000000e+00
25%      2.000000e+00
50%      3.500000e+01
75%      7.122500e+02
max      2.759285e+06
dtype: float64

In [0]:
quantile_sum_confirmed_per_land = sum_confirmed_per_land.quantile(q=0.98)

In [0]:
confirmed = confirmed_df[sum_confirmed_per_land > quantile_sum_confirmed_per_land]
deceased = deceased_df[sum_confirmed_per_land > quantile_sum_confirmed_per_land]
recovered = recovered_df[sum_confirmed_per_land > quantile_sum_confirmed_per_land]

In [12]:
confirmed.head()

Unnamed: 0,Land,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20
11,Germany,0,0,0,0,0,1,4,4,4,5,8,10,12,12,12,12,13,13,14,14,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,27,46,48,79,130,159,196,262,482,670,799,1040,1176,1457,1908,2078,3675,4585,5795,7272,9257,12327,15320,19848,22213
16,Italy,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,20,62,155,229,322,453,655,888,1128,1694,2036,2502,3089,3858,4636,5883,7375,9172,10149,12462,12462,17660,21157,24747,27980,31506,35713,41035,47021,53578
18,Spain,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,6,13,15,32,45,84,120,165,222,259,400,500,673,1073,1695,2277,2277,5232,6391,7798,9942,11748,13910,17963,20410,25374
154,China Hubei,444,444,549,761,1058,1423,3554,3554,4903,5806,7153,11177,13522,16678,19665,22112,24953,27100,29631,31728,33366,33366,48206,54406,56249,58182,59989,61682,62031,62442,62662,64084,64084,64287,64786,65187,65596,65914,66337,66907,67103,67217,67332,67466,67592,67666,67707,67743,67760,67773,67781,67786,67790,67794,67798,67799,67800,67800,67800,67800
155,Iran,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,5,18,28,43,61,95,139,245,388,593,978,1501,2336,2922,3513,4747,5823,6566,7161,8042,9000,10075,11364,12729,13938,14991,16169,17361,18407,19644,20610


In [13]:
num_lands, num_days = confirmed.iloc[:, 1:].shape
print('num_lands: {}, num_days: {} '.format(num_lands, num_days))

land_names = confirmed['Land']
print('land_names:\n', land_names)

date_names = confirmed.columns
print('date_names:\n', date_names)

num_lands: 10, num_days: 60 
land_names:
 11             Germany
16               Italy
18               Spain
154        China Hubei
155               Iran
156       Korea, South
157      France France
158    China Guangdong
159        China Henan
160     China Zhejiang
Name: Land, dtype: object
date_names:
 Index(['Land', '1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20',
       '1/27/20', '1/28/20', '1/29/20', '1/30/20', '1/31/20', '2/1/20',
       '2/2/20', '2/3/20', '2/4/20', '2/5/20', '2/6/20', '2/7/20', '2/8/20',
       '2/9/20', '2/10/20', '2/11/20', '2/12/20', '2/13/20', '2/14/20',
       '2/15/20', '2/16/20', '2/17/20', '2/18/20', '2/19/20', '2/20/20',
       '2/21/20', '2/22/20', '2/23/20', '2/24/20', '2/25/20', '2/26/20',
       '2/27/20', '2/28/20', '2/29/20', '3/1/20', '3/2/20', '3/3/20', '3/4/20',
       '3/5/20', '3/6/20', '3/7/20', '3/8/20', '3/9/20', '3/10/20', '3/11/20',
       '3/12/20', '3/13/20', '3/14/20', '3/15/20', '3/16/20', '3/17/20',
       '3/18/20', '3

In [0]:
base_land = 'China'
sad_land = 'Italy'
good_land = 'South Koera'

# Visualizations for the most affected countries

In [0]:
def bar_plot(xvalue, yticks, title):
  fig = go.Figure(
    data=[go.Bar(x=xvalue, y=yticks, orientation='h')],
    layout_title_text="A Figure Displayed with fig.show()")
  
  fig.update_layout(
      title={
          'text': title,
          'y':0.9,
          'x':0.5,
          'xanchor': 'center',
          'yanchor': 'top'})
  fig.show()

In [16]:
sum_confirmed_per_land = pd.DataFrame({'Land': confirmed['Land'], 'Sum':confirmed.sum(axis=1)})
sum_confirmed_per_land.sort_values(by=['Sum'], ascending=True, inplace=True)
bar_plot(sum_confirmed_per_land['Sum'], sum_confirmed_per_land['Land'], 'Bar plot of the total confirmed cases per country')

In [17]:
sum_deceased_per_land = pd.DataFrame({'Land': deceased['Land'], 'Sum':deceased.sum(axis=1)})
sum_deceased_per_land.sort_values(by=['Sum'], ascending=True, inplace=True)
bar_plot(sum_deceased_per_land['Sum'], sum_deceased_per_land['Land'], 'Bar plot of the total deceased cases per country')

In [18]:
sum_recovered_per_land = pd.DataFrame({'Land': recovered['Land'], 'Sum':recovered.sum(axis=1)})
sum_recovered_per_land.sort_values(by=['Sum'], ascending=True, inplace=True)
bar_plot(sum_recovered_per_land['Sum'], sum_recovered_per_land['Land'], 'Bar plot of the total recovered cases per country')

In [0]:
def find_start_dates(confirmed):
  lands = confirmed['Land']
  start_dates = {}
  for land in lands:
    df1 = confirmed.loc[confirmed['Land']==land].iloc[0,1:]!=0
    start_dates.update({land: df1.index[df1==True].tolist()[0]})
  return start_dates

In [20]:
start_dates = find_start_dates(confirmed)
print(start_dates)

{'Germany': '1/27/20', 'Italy': '1/31/20', 'Spain': '2/1/20', 'China Hubei': '1/22/20', 'Iran': '2/19/20', 'Korea, South': '1/22/20', 'France France': '1/24/20', 'China Guangdong': '1/22/20', 'China Henan': '1/22/20', 'China Zhejiang': '1/22/20'}


In [0]:
def trend_plot(df, title, start_date='1/22/20'):
  lands = df['Land'].values
  num_lands = len(lands)
  fig = go.Figure()
  for i in range(num_lands):
    xdata = df.loc[:, start_date:].columns
    df_land = df.loc[df['Land']==lands[i]]
    ydata = df_land.loc[:, start_date:].values[0]
    fig.add_trace(go.Scatter(
                    x=xdata,
                    y=ydata,
                    name=df['Land'].iloc[i],
                    opacity=0.8))
  fig.update_layout(
      title={
          'text': title,
          'y':0.9,
          'x':0.5,
          'xanchor': 'center',
          'yanchor': 'top'})
  fig.show()

In [22]:
trend_plot(confirmed, 'Confirmed Cases')

In [23]:
trend_plot(deceased, 'Deceased Cases')

In [24]:
trend_plot(recovered, 'Recovered Cases')

In [0]:
def case_speed(df, delta_t=1):
  land_names=df['Land']
  date_cols = df.columns[1:]
  num_days= len(date_cols)

  speed_list = []
  for i in range(0, num_days-delta_t, delta_t):
    speed_list.append(df[date_cols[i+delta_t]].values - df[date_cols[i]].values)
  speed_arr = np.array(speed_list)

  new_date_cols = [date_cols[0]+'-'+date_cols[i] for i in range(1, num_days-delta_t, delta_t)]
  speed_df = pd.DataFrame(columns=new_date_cols)

  for i, ncol in enumerate(new_date_cols):
    speed_df[ncol] = speed_arr[i,:]
  speed_df.insert(0, 'Land', land_names.values)

  return speed_df

In [0]:
def speed_plot(df, title):
  fig = go.Figure()
  for i in range(num_lands):
    fig.add_trace(go.Scatter(
                    x=df.columns[1:],
                    y=df.iloc[i],
                    name=df['Land'].iloc[i],
                    opacity=0.8))
  fig.update_layout(
      title={
          'text': title,
          'y':0.9,
          'x':0.5,
          'xanchor': 'center',
          'yanchor': 'top'})
  fig.show()

In [0]:
delta_t = 3
confirmed_speed = case_speed(df=confirmed, delta_t=delta_t)
deceased_speed = case_speed(df=deceased, delta_t=delta_t)
recovered_speed = case_speed(df=recovered, delta_t=delta_t)

In [28]:
speed_plot(confirmed_speed, 'Speed of Confirmed Cases')

In [29]:
speed_plot(deceased_speed, 'Speed of Deseased Cases')

In [30]:
speed_plot(recovered_speed, 'Speed of Recovered Cases')

# Group level visualization

In [0]:
confirmed_group = confirmed.iloc[:,1:].sum(axis=0)
deceased_group = deceased.iloc[:,1:].sum(axis=0)
recovered_group = recovered.iloc[:,1:].sum(axis=0)

In [0]:
confirmed_group_mean = np.mean(confirmed_group.values)
deceased_group_mean = np.mean(deceased_group.values)
recovered_group_mean = np.mean(recovered_group.values)

In [0]:
confirmed_group = pd.DataFrame(confirmed_group).T
deceased_group = pd.DataFrame(deceased_group).T
recovered_group = pd.DataFrame(recovered_group).T

confirmed_group.insert(0, "Land", 'All-TopQ')
deceased_group.insert(0, "Land", 'All-TopQ')
recovered_group.insert(0, "Land", 'All-TopQ')

In [0]:
delta_t = 3
confirmed_speed_group = case_speed(df=confirmed_group, delta_t=delta_t)
deceased_speed_group = case_speed(df=deceased_group, delta_t=delta_t)
recovered_speed_group = case_speed(df=recovered_group, delta_t=delta_t)

In [0]:
confirmed_speed_group_mean = np.mean(confirmed_speed_group.iloc[:,1:].values)
deceased_speed_group_mean = np.mean(deceased_speed_group.iloc[:,1:].values)
recovered_speed_group_mean = np.mean(recovered_speed_group.iloc[:,1:].values)

In [0]:
def land_plot(confirmed, deceased, recovered, confirmed_speed, deceased_speed, recovered_speed,
              confirmed_mean, deceased_mean, recovered_mean, confirmed_speed_mean, deceased_speed_mean, recovered_speed_mean,
              land='Iran'):
  
  fig = make_subplots(rows=2, cols=1)
  fig.add_trace(go.Scatter(x=confirmed.columns[1:],
                           y=confirmed.iloc[:,1:].values[0],
                           name='Confirmed', opacity=0.8, mode='lines', line = dict(color='royalblue')), row=1, col=1)
  
  fig.add_trace(go.Scatter(x=deceased.columns[1:],
                           y=deceased.iloc[:,1:].values[0],
                           name='Deceased', opacity=0.8, mode='lines', line = dict(color='firebrick')), row=1, col=1)

  fig.add_trace(go.Scatter(x=recovered.columns[1:],
                           y=recovered.iloc[:,1:].values[0],
                           name='Recovered', opacity=0.8, mode='lines', line = dict(color='green')), row=1, col=1)

  fig.add_trace(go.Scatter(x=confirmed_speed.columns[1:],
                           y=confirmed_speed.iloc[:,1:].values[0],
                           name='Confirmed Speed', opacity=0.8, mode='lines', line = dict(color='royalblue')), row=2, col=1)
  
  fig.add_trace(go.Scatter(x=deceased_speed.columns[1:],
                           y=deceased_speed.iloc[:,1:].values[0],
                           name='Deceased Speed', opacity=0.8, mode='lines', line = dict(color='firebrick')), row=2, col=1)
  
  fig.add_trace(go.Scatter(x=recovered_speed.columns[1:],
                           y=recovered_speed.iloc[:,1:].values[0],
                           name='Recovered Speed', opacity=0.8, mode='lines', line = dict(color='green')), row=2, col=1)
  

  #### Mean
  dates = confirmed.columns[1:]
  num_days = len(dates)
  confirmed_mean_vec = confirmed_mean*np.ones(num_days)
  deceased_mean_vec = deceased_mean*np.ones(num_days)
  recovered_mean_vec = recovered_mean*np.ones(num_days)
  confirmed_speed_mean_vec = confirmed_speed_mean*np.ones(num_days)
  deceased_speed_mean_vec = deceased_speed_mean*np.ones(num_days)
  recovered_speed_mean_vec = recovered_speed_mean*np.ones(num_days)

  fig.add_trace(go.Scatter(x=confirmed.columns[1:],
                           y=confirmed_mean_vec,
                           name='Confirmed Mean', opacity=0.8, line = dict(color='royalblue', dash='dash')), row=1, col=1)
  
  fig.add_trace(go.Scatter(x=deceased.columns[1:],
                           y=deceased_mean_vec,
                           name='Deceased Mean', opacity=0.8, line = dict(color='firebrick', dash='dash')), row=1, col=1)

  fig.add_trace(go.Scatter(x=recovered.columns[1:],
                           y=recovered_mean_vec,
                           name='Recovered Meam', opacity=0.8, line = dict(color='green', dash='dash')), row=1, col=1)
  
  fig.add_trace(go.Scatter(x=confirmed_speed.columns[1:],
                           y=confirmed_speed_mean_vec,
                           name='Confirmed Speed Mean', opacity=0.8, line = dict(color='royalblue', dash='dash')), row=2, col=1)
  
  fig.add_trace(go.Scatter(x=deceased_speed.columns[1:],
                           y=deceased_speed_mean_vec,
                           name='Deceased Speed Mean', opacity=0.8, line = dict(color='firebrick', dash='dash')), row=2, col=1)

  fig.add_trace(go.Scatter(x=recovered_speed.columns[1:],
                           y=recovered_speed_mean_vec,
                           name='Recovered Speed Meam', opacity=0.8, line = dict(color='green', dash='dash')), row=2, col=1)
  
  fig.update_layout(title={'text': land, 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
  fig.show()

In [37]:
land_plot(confirmed_group, deceased_group, recovered_group, confirmed_speed_group, deceased_speed_group, recovered_speed_group, 
          confirmed_group_mean, deceased_group_mean, recovered_group_mean, confirmed_speed_group_mean, deceased_speed_group_mean, recovered_speed_group_mean,
          land='All-TopQ')

# Country-wise visualization

In [0]:
def begin_with_start_time(df, start_times, land='Iran'):
  df_land = df.loc[df['Land']==land].loc[:, start_dates[land]:]
  df_land.insert(0, 'Land', land)
  return df_land

In [0]:
land = 'Iran'

In [0]:
confirmed_since = begin_with_start_time(confirmed, start_dates, land=land)
deceased_since = begin_with_start_time(deceased, start_dates, land=land)
recovered_since = begin_with_start_time(recovered, start_dates, land=land)

In [0]:
confirmed_since_mean = np.mean(confirmed_since.iloc[:,1:].values)
deceased_since_mean = np.mean(deceased_since.iloc[:,1:].values)
recovered_since_mean = np.mean(recovered_since.iloc[:,1:].values)

In [0]:
delta_t = 3
confirmed_speed = case_speed(df=confirmed_since, delta_t=delta_t)
deceased_speed = case_speed(df=deceased_since, delta_t=delta_t)
recovered_speed = case_speed(df=recovered_since, delta_t=delta_t)

In [0]:
confirmed_speed_mean = np.mean(confirmed_speed.iloc[:,1:].values)
deceased_speed_mean = np.mean(deceased_speed.iloc[:,1:].values)
recovered_speed_mean = np.mean(recovered_speed.iloc[:,1:].values)

In [44]:
land_plot(confirmed_since, deceased_since, recovered_since, confirmed_speed, deceased_speed, recovered_speed,
          confirmed_since_mean, deceased_since_mean, recovered_since_mean, confirmed_speed_mean, deceased_speed_mean, recovered_speed_mean,
          land=land)

In [0]:
def mortality_recovery_rates(confirmed1, deceased1, recovered1):
  mortality_rate = pd.DataFrame(deceased1.iloc[:,1:].values / confirmed1.iloc[:,1:].values, columns=confirmed1.columns[1:])
  recovery_rate = pd.DataFrame(recovered1.iloc[:,1:].values / confirmed1.iloc[:,1:].values , columns=confirmed1.columns[1:])

  return mortality_rate, recovery_rate

In [0]:
# for one country
mortality_rate, recovery_rate = mortality_recovery_rates(confirmed_since, deceased_since, recovered_since)

In [0]:
def rate_plot(mortality_rate, recovery_rate, land='Iran'):
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=mortality_rate.columns[1:],
                           y=mortality_rate.iloc[:,1:].values[0],
                           name='Mortality Rate', opacity=0.8, mode='lines', 
                           line = dict(color='firebrick')))
  
  fig.add_trace(go.Scatter(x=recovery_rate.columns[1:],
                           y=recovery_rate.iloc[:,1:].values[0],
                           name='Recovery Rate', opacity=0.8, mode='lines', 
                           line = dict(color='green')))
  fig.update_layout(title={'text': 'Mortality and Recovery Rates in {}'.format(land), 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
  fig.show()

In [48]:
rate_plot(mortality_rate, recovery_rate)

In [0]:
# for TopQ countries
mortality_rate, recovery_rate = mortality_recovery_rates(confirmed_group, deceased_group, recovered_group)

In [50]:
rate_plot(mortality_rate, recovery_rate, land='TopQ')

# ML analysis

In [0]:
y_train = confirmed_group.iloc[0,1:int(num_days*0.85)].values
y_valid = confirmed_group.iloc[0, int(num_days*0.85):].values

In [0]:
num_train_days = len(y_train)
num_valid_days = len(y_valid)

In [0]:
x_train = np.arange(0, num_train_days).reshape(-1, 1)
x_valid = np.arange(0, num_valid_days).reshape(-1, 1)

In [0]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
from fbprophet import Prophet
from sklearn.preprocessing import PolynomialFeatures

In [0]:
param_grid = {'kernel':['linear', 'rbf','poly'],
            'C':[0.01, 0.1, 1, 10],
            'gamma':[0.01, 0.1, 1],
            'shrinking':[True, False]}

In [0]:
svm = SVR(degree=3)

In [0]:
svm_search = RandomizedSearchCV(svm, param_grid, cv=5, return_train_score=True)

In [58]:
svm_search.fit(X=x_train, y=np.ravel(y_train))

RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                 epsilon=0.1, gamma='scale', kernel='rbf',
                                 max_iter=-1, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'C': [0.01, 0.1, 1, 10],
                                        'gamma': [0.01, 0.1, 1],
                                        'kernel': ['linear', 'rbf', 'poly'],
                                        'shrinking': [True, False]},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=True, scoring=None, verbose=0)

In [59]:
svm_search.best_estimator_

SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1,
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [0]:
prediction_valid_svm = svm_search.best_estimator_.predict(x_valid)

In [61]:
print('prediction_valid_svm:\n', prediction_valid_svm)

prediction_valid_svm:
 [24445.12222272 24445.97890675 24451.97569495 24468.25269145
 24499.95000053 24552.20772618 24630.16597253 24738.96484429
 24883.74444515 25069.64487916]


In [62]:
print("Root Mean Square Value:", np.sqrt(mean_squared_error(y_valid,prediction_valid_svm)))

Root Mean Square Value: 136222.18031858854


# XGBoost

In [0]:
import xgboost as xgb
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

In [0]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180
    }

In [0]:
 #Regression: 
def hyperparameter_tuning(space):
    reg=xgb.XGBRegressor(objective ='reg:squarederror',
                         n_estimators=1000,
                         max_depth = int(space['max_depth']), 
                         gamma = space['gamma'],
                         reg_alpha = int(space['reg_alpha']),
                         min_child_weight=space['min_child_weight'],
                         colsample_bytree=space['colsample_bytree'])
    
    evaluation = [( x_train, y_train), ( x_valid, y_valid)]
    
    reg.fit(x_train, y_train,
            eval_set=evaluation, eval_metric="rmse",
            early_stopping_rounds=10,verbose=False)

    pred = reg.predict(x_valid)
    mse= mean_squared_error(y_valid, pred)
    # print ("SCORE:", mse)
    #change the metric if you like
    return {'loss':mse, 'status': STATUS_OK }

In [66]:
trials = Trials()
best = fmin(fn=hyperparameter_tuning,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

print (best)

100%|██████████| 100/100 [00:04<00:00, 23.64it/s, best loss: 24536104626.185276]
{'colsample_bytree': 0.5548110583234305, 'gamma': 8.315681614877171, 'max_depth': 16.0, 'min_child_weight': 10.0, 'reg_alpha': 40.0, 'reg_lambda': 0.11177591301529752}


In [67]:
best

{'colsample_bytree': 0.5548110583234305,
 'gamma': 8.315681614877171,
 'max_depth': 16.0,
 'min_child_weight': 10.0,
 'reg_alpha': 40.0,
 'reg_lambda': 0.11177591301529752}

In [0]:
xg_reg = xgb.XGBRegressor(objective ='reg:squarederror', 
                          colsample_bytree = best['colsample_bytree'], 
                          gamma = best['gamma'],
                          learning_rate = 0.1,
                          min_child_weigh=best['min_child_weight'],
                          max_depth = int(best['max_depth']), 
                          reg_alpha = best['reg_alpha'],
                          reg_lambda = best['reg_lambda'],
                          alpha = 10, 
                          n_estimators = 1000)

In [69]:
xg_reg.fit(x_valid, y_valid)

XGBRegressor(alpha=10, base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.5548110583234305,
             gamma=8.315681614877171, importance_type='gain', learning_rate=0.1,
             max_delta_step=0, max_depth=16, min_child_weigh=10.0,
             min_child_weight=1, missing=None, n_estimators=1000, n_jobs=1,
             nthread=None, objective='reg:squarederror', random_state=0,
             reg_alpha=40.0, reg_lambda=0.11177591301529752, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [0]:
xgb_preds = xg_reg.predict(x_valid)

In [71]:
print("Root Mean Square Value:", np.sqrt(mean_squared_error(y_valid, xgb_preds)))

Root Mean Square Value: 13.590857720328719
