<a href="https://colab.research.google.com/github/Tomawock/MLDM_COVID-19/blob/master/testing/covid_random_forest_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Testing per le Random Forest

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import joblib
from datetime import datetime

from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, StratifiedKFold, KFold
from sklearn.metrics import r2_score, mean_squared_error, max_error
from sklearn.ensemble import RandomForestRegressor 

from sklearn.tree import DecisionTreeRegressor
from sklearn import tree

In [None]:
datasets_path=['/content/dataset_finale_medie_rnd1.csv','/content/dataset_finale_medie_2_week_rnd1.csv','/content/dataset_finale_medie_ponderate_rnd1.csv',
               '/content/dataset_finale_medie_rnd220.csv','/content/dataset_finale_medie_2_week_rnd220.csv','/content/dataset_finale_medie_ponderate_rnd220.csv']

rdn_states=[1, 22, 777, 6654, 432145]
print(datasets_path.shape)

In [2]:
df_2 = pd.read_csv("/content/dataset_finale_medie_2_week_rnd1.csv", parse_dates=["Data"], infer_datetime_format=True)

rnd_state = 33
predictor_columns = [
                     'pass_Ammoniaca', 
                     'pass_Benzene',
       'pass_Biossido di Azoto', 
       'pass_Biossido di Zolfo',
       'pass_Monossido di Azoto', 
       'pass_Monossido di Carbonio',
       'pass_Ossidi di Azoto', 
       'pass_Ozono', 
       'pass_PM10 (SM2005)',
       'pass_Particelle sospese PM2.5',
       'pass_Radiazione Globale', 
       'pass_Temperatura',
       'pass_deceduti',
       'pass_nuovi_positivi',
       'pass_ricoverati_con_sintomi', 
       'pass_tamponi']


In [3]:
train_2 = df_2.sample(frac=0.70, random_state=rnd_state)
test_2 = df_2.drop(train_2.index)

train_X_2 = train_2[predictor_columns]
train_y_2 = train_2["fut1_nuovi_positivi"]

test_X_2 = test_2[predictor_columns]
test_y_2 = test_2["fut1_nuovi_positivi"]

In [4]:
# create regressor object 
forest_regressor = RandomForestRegressor(bootstrap=False,random_state=rnd_state) 

In [5]:
imp=SimpleImputer(missing_values=np.nan, strategy="mean")
imp=imp.fit(train_X_2)

In [7]:
n_estimators_list = [100]
criterion_list = ["mse", "mae"]
max_depth_list = [None, 5, 6]
min_samples_split_list = [2, 3]
min_samples_split_leaf_list = [3,5]
min_weight_fraction_leaf_list = [0.0, 0.1]
max_features_list = ["auto", "sqrt", "log2"]
max_leaf_nodes_list = [None, 10, 25]
min_impurity_decrease_list = [0.0, 0.1]

param_grd = { "n_estimators":n_estimators_list,
              "criterion": criterion_list,
              "max_depth": max_depth_list,
              "min_samples_split": min_samples_split_list,
              "min_samples_leaf": min_samples_split_leaf_list,
              "min_weight_fraction_leaf": min_weight_fraction_leaf_list,
              "max_features": max_features_list,
              "max_leaf_nodes": max_leaf_nodes_list,
              "min_impurity_decrease": min_impurity_decrease_list}

In [8]:
grid_regressor_2 = GridSearchCV(forest_regressor, param_grd, 
                              n_jobs=-1, 
                              verbose=1)

In [None]:
grid_regressor_2.fit(imp.transform(train_X_2), train_y_2)

Fitting 5 folds for each of 864 candidates, totalling 4320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 196 tasks      | elapsed:   20.2s
[Parallel(n_jobs=-1)]: Done 446 tasks      | elapsed:   41.1s
[Parallel(n_jobs=-1)]: Done 796 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 1246 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 1796 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 2446 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 3196 tasks      | elapsed:  5.7min


In [None]:
best_regressor_2 = grid_regressor_2.best_estimator_

imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
imputer = imputer.fit(test_X_2)
test_X_imp = imputer.transform(test_X_2)

predicted_2 = best_regressor_2.predict(test_X_imp)

In [None]:
print(f"R2: \t\t{r2_score(test_y_2, predicted_2):.3}")
print(f"RMSE: \t\t{mean_squared_error(test_y_2, predicted_2, squared=False):.3}")
print(f"MAX ERR: \t{max_error(test_y_2, predicted_2)}")