In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score


from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder


In [2]:
r = requests.get("https://api-pc6dbtrtla-uc.a.run.app/API/timeseries/usa")
response_dict = r.json()
dataset1 = pd.DataFrame.from_dict(response_dict)
dataset1 = dataset1.rename(columns={'Total Results as of Date': 'Date'})
Days = (np.array([i for i in range (len(dataset1.Date))]).reshape(-1,1))+1
dataset1['Days'] = Days 

r2 = requests.get("https://api-pc6dbtrtla-uc.a.run.app/API/timeseries/ita")
response_dict2 = r2.json()
dataset2 = pd.DataFrame.from_dict(response_dict2)
dataset2 = dataset2.rename(columns={'Total Results as of Date': 'Date'})
Days = (np.array([i for i in range (len(dataset2.Date))]).reshape(-1,1))+1
dataset2['Days'] = Days 


dataset = pd.concat([dataset1, dataset2])
dataset

Unnamed: 0,ISO3,Country,Date,Cases,Deaths,Recovered,Days
0,USA,US,2020-01-22,1.0,0.0,0.0,1
1,USA,US,2020-01-23,1.0,0.0,0.0,2
2,USA,US,2020-01-24,2.0,0.0,0.0,3
3,USA,US,2020-01-25,2.0,0.0,0.0,4
4,USA,US,2020-01-26,5.0,0.0,0.0,5
...,...,...,...,...,...,...,...
99,ITA,Italy,2020-04-30,205463.0,27967.0,75945.0,100
100,ITA,Italy,2020-05-01,207428.0,28236.0,78249.0,101
101,ITA,Italy,2020-05-02,209328.0,28710.0,79914.0,102
102,ITA,Italy,2020-05-03,210717.0,28884.0,81654.0,103


## -------------------------------- ** ------------------------------------


## Cases

In [3]:
X = dataset[[ "ISO3", "Days" ]].values
y = dataset["Cases"].values

In [4]:
# ## Encoding categorical data

# # 0 for iso3 in the first column

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [5]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [6]:
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [7]:
## Predicting the Test set results

y_pred = regressor.predict(X_test)


#y_pred = regressor.predict([[1,0, 100]])
#y_pred

In [8]:
## Evaluating the Model Performance - R2 Score
r2_score(y_test, y_pred)


0.9980957479704953

In [9]:
## Evaluating the Model Performance - Mean-squared Error
mean_squared_error(y_test, y_pred)

132146531.51047625

In [10]:
## Applying k-Fold Cross Validation
accuracies = cross_val_score(estimator = regressor, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 99.57 %
Standard Deviation: 0.40 %


## -------------------------------- ** ------------------------------------

## Recovered

In [11]:
X_rec = dataset[[ "ISO3", "Days"]].values
y_rec = dataset["Recovered"].values

In [12]:
# ## Encoding categorical data

# # 0 for iso3 in the first column

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X_rec = np.array(ct.fit_transform(X_rec))

In [13]:
# Splitting the dataset into the Training set and Test set
X_rec_train, X_rec_test, y_rec_train, y_rec_test = train_test_split(X_rec, y_rec, test_size = 0.2, random_state = 0)

In [14]:
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_rec_train, y_rec_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [15]:
## Predicting the Test set results

y_pred_rec = regressor.predict(X_rec_test)


#y_pred_rec = regressor.predict([[1,0, 100]])
#y_pred_rec

In [16]:
## Evaluating the Model Performance - R2 Score
r2_score(y_rec_test, y_pred_rec)


0.99600703699641

In [17]:
## Applying k-Fold Cross Validation
accuracies = cross_val_score(estimator = regressor, X = X_rec_train, y = y_rec_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 98.42 %
Standard Deviation: 1.34 %


## -------------------------------- ** ------------------------------------


## Deaths

In [18]:
X_death = dataset[[ "ISO3", "Days"]].values
y_death = dataset["Recovered"].values

In [19]:
# ## Encoding categorical data

# # 0 for iso3 in the first column

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X_death = np.array(ct.fit_transform(X_death))


In [20]:
# Splitting the dataset into the Training set and Test set
X_death_train, X_death_test, y_death_train, y_death_test = train_test_split(X_death, y_death, test_size = 0.2, random_state = 0)

In [21]:
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_death_train, y_death_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [22]:
## Predicting the Test set results

y_pred_death = regressor.predict(X_death_test)

# y_pred_death = regressor.predict([[1,0, 100]])
# y_pred_death

In [23]:
## Evaluating the Model Performance - R2 Score
r2_score(y_death_test, y_pred_death)


0.99600703699641

In [24]:
## Applying k-Fold Cross Validation
accuracies = cross_val_score(estimator = regressor, X = X_death_train, y = y_death_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 98.42 %
Standard Deviation: 1.34 %
