In [36]:
import numpy as np 
import pandas as pd 

from fbprophet import Prophet
from fbprophet.plot import plot_plotly
from fbprophet.plot import add_changepoints_to_plot

import plotly.offline as py
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from sklearn.metrics import mean_absolute_error

import itertools
from statsmodels.tsa.arima_model import ARIMA

from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor

import warnings
warnings.simplefilter('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/coronavirus-turkey/Coronavirus Turkey.csv


# **LOAD DATASETS**

In [37]:
data = pd.read_csv("../input/coronavirus-turkey/Coronavirus Turkey.csv")
data.Date = pd.to_datetime(data.Date)
data.head()


Unnamed: 0,Date,Total Cases,Total Deaths,Total Recovered,Total Active,Daily Cases,Daily Test,Total Intensive Care,Intubated Cases,Day
0,2020-03-11,1,0,0,1,1,0,0,0,1
1,2020-03-12,1,0,0,1,0,0,0,0,2
2,2020-03-13,5,0,0,5,4,0,0,0,3
3,2020-03-14,6,0,0,6,1,0,0,0,4
4,2020-03-15,18,0,0,18,12,0,0,0,5


In [38]:
case = data[["Date","Total Cases"]]
deaths = data[["Date","Total Deaths"]]
recoveries = data[["Date","Total Recovered"]]

In [39]:
case.columns = ['ds', 'y']
deaths.columns = ['ds', 'y']
recoveries.columns = ['ds', 'y']

In [40]:
f_date = pd.to_datetime('2020-03-11')
s_date = pd.to_datetime('2020-04-29')
e_date = pd.to_datetime('2020-05-13')
l_date = pd.to_datetime('2020-05-27')

In [41]:
# CASE

case_df = case.copy()
case_df.ds = pd.to_datetime(case_df.ds)

C_train = case_df.loc[(case_df['ds'] >= f_date) & (case_df['ds'] <= s_date)]
C_test = case_df.loc[(case_df['ds'] >= s_date) & (case_df['ds'] <= e_date)]
C_result = C_test.copy()

# DEATHS

deaths_df = deaths.copy()
deaths_df.ds = pd.to_datetime(deaths_df.ds)

D_train = deaths_df.loc[(deaths_df['ds'] >= f_date) & (deaths_df['ds'] <= s_date)]
D_test = deaths_df.loc[(deaths_df['ds'] >= s_date) & (deaths_df['ds'] <= e_date)]
D_result = D_test.copy()


# RECOVERIES

recoveries_df = recoveries.copy()
recoveries_df.ds = pd.to_datetime(recoveries_df.ds)

R_train = recoveries_df.loc[(recoveries_df['ds'] >= f_date) & (recoveries_df['ds'] <= s_date)]
R_test = recoveries_df.loc[(recoveries_df['ds'] >= s_date) & (recoveries_df['ds'] <= e_date)]
R_result = R_test.copy()

In [42]:
print(len(R_train))

50


# **PROPHET FORECASTING**

In [43]:
C = ['2020-03-15','2020-03-18','2020-03-22','2020-03-26','2020-03-30','2020-04-02','2020-04-06',
     '2020-04-10','2020-04-14','2020-04-17','2020-04-21','2020-04-25','2020-04-28']

D = ['2020-03-18','2020-03-22','2020-03-26','2020-03-29','2020-04-02','2020-04-06',
     '2020-04-10','2020-04-14','2020-04-25','2020-04-29']

R = ['2020-03-22','2020-03-30','2020-04-02','2020-04-06','2020-04-10',
     '2020-04-14','2020-04-17','2020-04-19','2020-04-21','2020-04-25']


Model_C = Prophet(changepoint_prior_scale=.5, changepoints = C) 
Model_C .fit(C_train )

Model_D = Prophet(changepoint_prior_scale=.5, changepoints = D) 
Model_D .fit(D_train )

Model_R = Prophet(changepoint_prior_scale=.9, changepoints = R) 
Model_R .fit(R_train )

<fbprophet.forecaster.Prophet at 0x7f9b663c1350>

In [44]:
Future_C = Model_C.make_future_dataframe(periods=len(C_train))
Future_D = Model_D.make_future_dataframe(periods=len(D_train))
Future_R = Model_R.make_future_dataframe(periods=len(D_train))

Forecast_C = Model_C.predict(Future_C)
Forecast_D = Model_D.predict(Future_D)
Forecast_R = Model_R.predict(Future_R)

In [45]:
C_valitate = Forecast_C.loc[(Forecast_C['ds'] >= s_date) & (Forecast_C['ds'] <= e_date)]
C_result["PROPHET"] = C_valitate['yhat'].values

D_valitate = Forecast_D.loc[(Forecast_D['ds'] >= s_date) & (Forecast_D['ds'] <= e_date)]
D_result["PROPHET"] = D_valitate['yhat'].values

R_valitate = Forecast_R.loc[(Forecast_R['ds'] >= s_date) & (Forecast_R['ds'] <= e_date)]
R_result["PROPHET"] = R_valitate['yhat'].values

# **ARIMA FORECAST**

In [46]:
C_train = np.array(C_train.y, dtype=np.float64)
C_test = np.array(C_test.y, dtype=np.float64)

D_train = np.array(D_train.y, dtype=np.float64)
D_test = np.array(D_test.y, dtype=np.float64)

R_train = np.array(R_train.y, dtype=np.float64)
R_test = np.array(R_test.y, dtype=np.float64)

In [47]:
p=d=q=range(0,4)
a=99999
pdq=list(itertools.product(p,d,q))
    
#Determining the best parameters
for var in pdq:
    try:
        model = ARIMA(C_train, order=var)
        result = model.fit()

        if (result.aic<=a) :
            a=result.aic
            param=var
    except:
        continue
        

model = ARIMA(C_train, order=param)
result = model.fit()
C_prediction=result.forecast(steps=len(C_test))[0]
C_result['ARIMA'] = C_prediction

print(result.summary()) 

                             ARIMA Model Results                              
Dep. Variable:                   D2.y   No. Observations:                   48
Model:                 ARIMA(3, 2, 2)   Log Likelihood                -355.038
Method:                       css-mle   S.D. of innovations            375.131
Date:                Fri, 15 May 2020   AIC                            724.076
Time:                        21:02:35   BIC                            737.174
Sample:                             2   HQIC                           729.026
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         64.4851     58.875      1.095      0.273     -50.909     179.879
ar.L1.D2.y    -0.9367      0.151     -6.185      0.000      -1.234      -0.640
ar.L2.D2.y    -0.7678      0.165     -4.640      0.0

In [48]:
p=d=q=range(0,4)
a=99999
pdq=list(itertools.product(p,d,q))
    
#Determining the best parameters
for var in pdq:
    try:
        model = ARIMA(D_train, order=var)
        result = model.fit()

        if (result.aic<=a) :
            a=result.aic
            param=var
    except:
        continue
        

model = ARIMA(D_train, order=param)
result = model.fit()
D_prediction=result.forecast(steps=len(C_test))[0]
D_result['ARIMA'] = D_prediction
print(result.summary()) 

                             ARIMA Model Results                              
Dep. Variable:                   D2.y   No. Observations:                   48
Model:                 ARIMA(1, 2, 0)   Log Likelihood                -150.888
Method:                       css-mle   S.D. of innovations              5.603
Date:                Fri, 15 May 2020   AIC                            307.776
Time:                        21:02:41   BIC                            313.389
Sample:                             2   HQIC                           309.897
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.7831      1.216      1.466      0.143      -0.601       4.167
ar.L1.D2.y     0.3419      0.135      2.538      0.011       0.078       0.606
                                    Roots           

In [49]:
p=d=q=range(0,4)
a=99999
pdq=list(itertools.product(p,d,q))
    
#Determining the best parameters
for var in pdq:
    try:
        model = ARIMA(R_train, order=var)
        result = model.fit()

        if (result.aic<=a) :
            a=result.aic
            param=var
    except:
        continue
        

model = ARIMA(R_train, order=param)
result = model.fit()
R_prediction=result.forecast(steps=len(C_test))[0]
R_result['ARIMA'] = R_prediction
print(result.summary()) 

                             ARIMA Model Results                              
Dep. Variable:                   D2.y   No. Observations:                   48
Model:                 ARIMA(0, 2, 3)   Log Likelihood                -334.828
Method:                       css-mle   S.D. of innovations            257.550
Date:                Fri, 15 May 2020   AIC                            679.655
Time:                        21:02:45   BIC                            689.011
Sample:                             2   HQIC                           683.191
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        117.6693     63.788      1.845      0.065      -7.353     242.692
ma.L1.D2.y     0.1988      0.148      1.344      0.179      -0.091       0.489
ma.L2.D2.y     0.1579      0.176      0.898      0.3

# **DECISION TREE REGRESSION**

In [50]:
def preprocessing(X, method):

    # scale, return[0-1]
    if method == 'MinMaxScaler':
        X_new = MinMaxScaler().fit_transform(X)

    return X_new

In [51]:
C_data = data[["Total Cases"]]
C_data = C_data.rename(columns={"Total Cases": "t"})

C_data["t-1"] = C_data["t"].shift(periods=1, fill_value=0)
C_data["t-2"] = C_data["t"].shift(periods=2, fill_value=0)
C_data["t-3"] = C_data["t"].shift(periods=3, fill_value=0)
C_data["t-4"] = C_data["t"].shift(periods=4, fill_value=0)
C_data["t-5"] = C_data["t"].shift(periods=5, fill_value=0)
C_data["t-6"] = C_data["t"].shift(periods=6, fill_value=0)
C_data["t-7"] = C_data["t"].shift(periods=7, fill_value=0)

C_data = C_data.replace([np.inf, -np.inf], np.nan)
C_data = C_data.dropna()

c_train = C_data.iloc[0:51]
c_test = C_data.iloc[51:]

columns = ["t-1","t-2","t-3","t-4","t-5","t-6","t-7"]

X_c_train = c_train[columns].values
y_c_train = c_train["t"].values

X_c_test = c_test[columns].values
y_c_test = c_test["t"].values


X_c_train = preprocessing(X_c_train, 'MinMaxScaler')
X_c_test = preprocessing(X_c_test, 'MinMaxScaler')

dt = DecisionTreeRegressor()
dt.fit(X_c_train, y_c_train)
pred = dt.predict(X_c_test)
C_result['DT'] = pred

In [52]:
D_data = data[["Total Deaths"]]
D_data = D_data.rename(columns={"Total Deaths": "t"})

D_data["t-1"] = D_data["t"].shift(periods=1, fill_value=0)
D_data["t-2"] = D_data["t"].shift(periods=2, fill_value=0)
D_data["t-3"] = D_data["t"].shift(periods=3, fill_value=0)
D_data["t-4"] = D_data["t"].shift(periods=4, fill_value=0)
D_data["t-5"] = D_data["t"].shift(periods=5, fill_value=0)
D_data["t-6"] = D_data["t"].shift(periods=6, fill_value=0)
D_data["t-7"] = D_data["t"].shift(periods=7, fill_value=0)

D_data = D_data.replace([np.inf, -np.inf], np.nan)
D_data = D_data.dropna()

d_train = D_data.iloc[0:51]
d_test = D_data.iloc[51:]

columns = ["t-1","t-2","t-3","t-4","t-5","t-6","t-7"]

X_d_train = d_train[columns].values
y_d_train = d_train["t"].values

X_d_test = d_test[columns].values
y_d_test = d_test["t"].values


X_d_train = preprocessing(X_d_train, 'MinMaxScaler')
X_d_test = preprocessing(X_d_test, 'MinMaxScaler')

dt = DecisionTreeRegressor()
dt.fit(X_d_train, y_d_train)
pred = dt.predict(X_d_test)
D_result['DT'] = pred


In [53]:
R_data = data[["Total Recovered"]]
R_data = R_data.rename(columns={"Total Recovered": "t"})

R_data["t-1"] = R_data["t"].shift(periods=1, fill_value=0)
R_data["t-2"] = R_data["t"].shift(periods=2, fill_value=0)
R_data["t-3"] = R_data["t"].shift(periods=3, fill_value=0)
R_data["t-4"] = R_data["t"].shift(periods=4, fill_value=0)
R_data["t-5"] = R_data["t"].shift(periods=5, fill_value=0)
R_data["t-6"] = R_data["t"].shift(periods=6, fill_value=0)
R_data["t-7"] = R_data["t"].shift(periods=7, fill_value=0)

R_data = R_data.replace([np.inf, -np.inf], np.nan)
R_data = R_data.dropna()

r_train = R_data.iloc[0:51]
r_test = R_data.iloc[51:]

columns = ["t-1","t-2","t-3","t-4","t-5","t-6","t-7"]

X_r_train = r_train[columns].values
y_r_train = r_train["t"].values

X_r_test = r_test[columns].values
y_r_test = r_test["t"].values


X_r_train = preprocessing(X_r_train, 'MinMaxScaler')
X_r_test = preprocessing(X_r_test, 'MinMaxScaler')

dt = DecisionTreeRegressor()
dt.fit(X_r_train, y_r_train)
pred = dt.predict(X_r_test)
R_result['DT'] = pred

# **RESULT**

In [54]:
print(" ********** CASE **********")
print("#### PROPHET ####")
MAPE = np.mean(abs((C_result['y'] - C_result['PROPHET']) / C_result['y'])) * 100
MAE = mean_absolute_error(C_result['y'], C_result['PROPHET'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### ARIMA ####")
MAPE = np.mean(abs((C_result['y'] - C_result['ARIMA']) / C_result['y'])) * 100
MAE = mean_absolute_error(C_result['y'], C_result['ARIMA'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### DT ####")
MAPE = np.mean(abs((C_result['y'] - C_result['DT']) / C_result['y'])) * 100
MAE = mean_absolute_error(C_result['y'], C_result['DT'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("\n")


print(" ********** DEATHS **********")
print("#### PROPHET ####")
MAPE = np.mean(abs((D_result['y'] - D_result['PROPHET']) / D_result['y'])) * 100
MAE = mean_absolute_error(D_result['y'], D_result['PROPHET'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### ARIMA ####")
MAPE = np.mean(abs((D_result['y'] - D_result['ARIMA']) / D_result['y'])) * 100
MAE = mean_absolute_error(D_result['y'], D_result['ARIMA'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### DT ####")
MAPE = np.mean(abs((D_result['y'] - D_result['DT']) / D_result['y'])) * 100
MAE = mean_absolute_error(D_result['y'], D_result['DT'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("\n")

print(" ********** RECOVERIES **********")
print("#### PROPHET ####")
MAPE = np.mean(abs((R_result['y'] - R_result['PROPHET']) / R_result['y'])) * 100
MAE = mean_absolute_error(R_result['y'], R_result['PROPHET'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### ARIMA ####")
MAPE = np.mean(abs((R_result['y'] - R_result['ARIMA']) / R_result['y'])) * 100
MAE = mean_absolute_error(R_result['y'], R_result['ARIMA'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("#### DT ####")
MAPE = np.mean(abs((R_result['y'] - R_result['DT']) / R_result['y'])) * 100
MAE = mean_absolute_error(R_result['y'], R_result['DT'])
print("MAE : ", MAE)
print("MAPE : ", MAPE)
print("\n")

 ********** CASE **********
#### PROPHET ####
MAE :  7241.258483907102
MAPE :  5.308778584846607
#### ARIMA ####
MAE :  13908.709456962097
MAPE :  10.282216237270841
#### DT ####
MAE :  58071.0
MAPE :  45.60275655250662


 ********** DEATHS **********
#### PROPHET ####
MAE :  150.60045603993615
MAPE :  4.0009156086678415
#### ARIMA ####
MAE :  298.33177250038875
MAPE :  8.068339085680835
#### DT ####
MAE :  1610.657142857143
MAPE :  47.00648241424513


 ********** RECOVERIES **********
#### PROPHET ####
MAE :  2218.3968823859955
MAPE :  2.4582559249384186
#### ARIMA ####
MAE :  19463.38116636523
MAPE :  23.701780132627874
#### DT ####
MAE :  48812.48444444444
MAPE :  67.91644858617609


