## **Predicting Disease Spread: Model for San Juan**

### **Imports**

In [1]:
import sys
import os

import numpy as np
import pandas as pd

sys.path.append(os.path.abspath(".."))
from src.data_setup import load_data, split_cities
from src.model import (
    ModelTrainer,
    supporting_model
)

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor


### **Data**

In [2]:
features_train, labels_train = load_data(train=True)
sj_features_train, iq_features_train = split_cities(features_train)
sj_labels_train, iq_labels_train = split_cities(labels_train)

In [3]:
features_test, labels_test = load_data(train=False)
sj_features_test, iq_features_test = split_cities(features_test)

#### Drop only non-numeric, irrelevant column

In [4]:
sj_features_train_num = sj_features_train.drop(columns=['week_start_date'])
sj_features_test_num = sj_features_test.drop(columns=['week_start_date'])
sj_features_test_added_prev_3 = pd.read_csv("../data/sj_features_test_added_prev_3.csv")

### **Prev cases as predictions of another model**

#### Best supporting model choice 

In [5]:
knn = KNeighborsRegressor()
rf = RandomForestRegressor()
en = ElasticNet()
svr = SVR()
boosting = GradientBoostingRegressor()
mlp = MLPRegressor()

In [6]:
# Parameter grid for KNeighborsRegressor
PARAM_GRID_KNN = {
    'n_neighbors': [3, 5, 7, 9, 13, 17],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

# Parameter grid for RandomForestRegressor
PARAM_GRID_RF = {
    'n_estimators': [500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Parameter grid for GradientBoostingRegressor
PARAM_GRID_BOOSTING = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Parameter grid for ElasticNet
PARAM_GRID_EN = {
    'alpha': [0.01, 0.1, 1.0, 10.0, 100.0],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99]
}

# Parameter grid for SVR
PARAM_GRID_SVR = {
    'C': [0.1, 1, 10, 100],
    'epsilon': [0.01, 0.1, 0.2, 0.5],
    'kernel': ['linear', 'rbf', 'poly']
}

# Parameter grid for MLPRegressor
PARAM_GRID_MLP = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive'],
    'alpha': [0.0001, 0.001, 0.01]
}

In [17]:
K_PREV = 3
IMPUTATION_METHOD = "linear"
TOP_N_FEATURES = 10
CORR_METHOD = "pearson"
SCALER = "standard"

In [None]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=ConvergenceWarning)

supp = supporting_model(
    sj_features_train_num,
    sj_labels_train,
    sj_features_test_num,
    [knn, rf, en, svr, boosting, mlp],
    [PARAM_GRID_KNN, PARAM_GRID_RF, PARAM_GRID_EN, PARAM_GRID_SVR, PARAM_GRID_BOOSTING, PARAM_GRID_MLP],
)

Summary of the best supporting model choice: 
| Model                     | Score     |
|---------------------------|-----------|
| KNeighborsRegressor       | 29.9257  |
| RandomForestRegressor     | 30.0056  |
| ElasticNet                | 28.4897  |
| **SVR**                   | 24.0892  |
| GradientBoostingRegressor | 28.7210  |
| MLPRegressor              | 25.8663  |

In [37]:
sj_features_test_added_prev = supp["X_test"]

In [39]:
sj_features_test_added_prev.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,...,ndvi_ne_ndvi_nw_sum,ndvi_ne_ndvi_se_diff,ndvi_ne_ndvi_se_prod,ndvi_ne_ndvi_se_quot,ndvi_ne_ndvi_se_sum,ndvi_ne_ndvi_sw_diff,ndvi_ne_ndvi_sw_prod,1_prev_cases,2_prev_cases,3_prev_cases
year,weekofyear,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2008,18,-0.0189,-0.0189,0.102729,0.0912,78.6,298.492857,298.55,294.527143,301.1,296.4,...,-0.0378,-0.121629,-0.001942,-0.18398,0.083829,-0.1101,-0.001724,5.0,3.0,1.0


#### Models

In [24]:
model_trainer_knn = ModelTrainer(model=KNeighborsRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_KNN)

model_trainer_knn.fit(sj_features_train_num, sj_labels_train)
model_trainer_knn.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model KNeighborsRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.
CV results: -17.9982,-17.9026,-17.3350,-17.2672,-17.8813,-17.7516,-17.2124,-17.1055,-17.8219,-17.7152,-17.1716,-17.0538,-17.9932,-17.8670,-17.3402,-17.1986,-18.3461,-18.1948,-17.5032,-17.3395,-18.6343,-18.4741,-17.9710,-17.7814
Mean: -17.7025




In [29]:
model_trainer_boosting = ModelTrainer(model=GradientBoostingRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_BOOSTING)

model_trainer_boosting.fit(sj_features_train_num, sj_labels_train)
model_trainer_boosting.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model GradientBoostingRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.
CV results: -14.0675,-13.9734,-9.1260,-9.4617,-9.3319,-9.6191,-9.6079,-9.0905,-14.1598,-9.3050,-13.4086,-9.4471,-9.6072,-9.7587,-9.5465,-9.4892,-9.3282,-13.9668,-13.8156,-9.3191,-13.2491,-10.1752,-9.5556,-13.5607,-9.6045,-10.0154,-10.2360,-10.0352,-9.2427,-9.2499,-9.3084,-9.8865,-10.1814,-13.4838,-14.0679,-9.2194,-9.1948,-9.3292,-9.8848,-9.4667,-13.6887,-9.0840,-9.3537,-10.1108,-9.3957,-10.0915,-9.6986,-9.1943,-9.4982,-9.0592,-9.4505,-9.2943,-9.2197,-9.3617,-10.4539,-9.6123,-9.6582,-9.7179,-9.3670,-9.4259,-9.2710,-9.2886,-9.1585,-9.5169,-9.3913,-9.1107,-13.5193,-9.2230,-9.6198,-9.5404,-9.6410,-9.5489,-10.0221,-9.3939,-9.0847,-9.0247,-9.4458,-9.3255,-9.9833,-9.0273,-9.3694,-9.6207,-9.4731,-9.5252,-9.1894,-9.3483,-10.0169,-14.1729,-8.8452,-10.0880,-13.2733,-9.6222,-9.1241,-9.7507,-13.4610,-9.4519,-9.6572,-9.3831,-10.0298,-9.1245
Mean: -10.1378


In [30]:
model_trainer_rf = ModelTrainer(model=RandomForestRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_RF)

model_trainer_rf.fit(sj_features_train_num, sj_labels_train)
model_trainer_rf.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model RandomForestRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -9.0245,-8.9276,-8.8988,-8.9706,-8.9012,-8.8600,-9.0739,-9.0664,-9.0690,-8.9714,-8.9119,-8.8804,-8.9792,-8.9147,-8.8964,-9.0502,-9.0218,-9.0661,-9.0092,-8.9770,-8.8901,-8.9887,-8.9706,-8.9025,-9.0431,-9.0463,-9.0476,-9.0384,-8.9023,-8.8709,-8.9304,-8.9167,-8.8789,-9.0623,-9.0657,-9.1026
Mean: -8.9758


In [31]:
model_trainer_en = ModelTrainer(model=ElasticNet(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_EN)

model_trainer_en.fit(sj_features_train_num, sj_labels_train)
model_trainer_en.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model ElasticNet ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -8.3428,-8.3434,-8.3461,-8.3538,-8.3679,-8.3723,-8.3763,-8.6970,-8.5687,-8.4375,-8.3099,-8.2228,-8.2068,-8.1985,-11.9089,-11.2275,-10.4523,-9.6102,-8.5440,-8.2436,-7.9840,-22.8746,-22.1219,-21.0098,-19.2000,-15.4780,-13.8210,-11.7666,-28.2909,-28.5177,-28.8072,-28.8857,-28.8857,-28.8857,-28.8857
Mean: -14.6441


In [32]:
model_trainer_svr = ModelTrainer(model=SVR(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_SVR)

model_trainer_svr.fit(sj_features_train_num, sj_labels_train)
model_trainer_svr.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model SVR ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -9.8419,-24.9646,-24.5348,-9.8406,-24.9543,-24.5459,-9.8277,-24.9573,-24.5483,-9.8145,-24.9617,-24.5244,-8.2594,-21.2066,-20.0259,-8.2578,-21.2064,-20.0230,-8.2519,-21.2010,-20.0198,-8.2395,-21.1916,-20.0397,-8.0717,-13.9848,-16.5667,-8.0731,-13.9824,-16.5568,-8.0743,-13.9826,-16.5466,-8.0634,-13.9841,-16.5042,-8.0603,-12.3407,-19.4226,-8.0603,-12.3343,-19.4249,-8.0575,-12.3247,-19.4223,-8.0551,-12.2971,-19.4092
Mean: -15.6008


In [33]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=ConvergenceWarning)

model_trainer_mlp = ModelTrainer(model=MLPRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_MLP)

model_trainer_mlp.fit(sj_features_train_num, sj_labels_train)
model_trainer_mlp.transform(sj_features_train_num, sj_labels_train, sj_features_test_added_prev_3)

--- Model MLPRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -12.3059,-9.6979,-12.2848,-9.8045,-11.6916,-9.6687,-11.5740,-9.6825,-10.1979,-10.5175,-10.1638,-10.0464,-10.0575,-10.4807,-9.8586,-9.9918,-12.5056,-9.7161,-12.4311,-9.5044,-11.8473,-9.7311,-11.6271,-9.8999,-9.8594,-9.8267,-10.2691,-9.7325,-9.8234,-10.3610,-10.0928,-10.1066,-12.4090,-9.7098,-12.2756,-9.7095,-11.7680,-9.8090,-11.6507,-9.7416,-10.3613,-10.4743,-10.0127,-10.5816,-10.0967,-10.6488,-9.8754,-10.2979,-16.0097,-16.1104,-16.0296,-15.5879,-13.0088,-14.1055,-12.9594,-13.6602,-16.0912,-14.6791,-16.1480,-15.1186,-16.1558,-14.8952,-16.1785,-14.2170,-16.0714,-15.7436,-16.1246,-15.8280,-13.0087,-13.6755,-12.9792,-13.5265,-16.0690,-14.9625,-16.0813,-15.0429,-16.1807,-14.6742,-15.9765,-14.2347,-15.9639,-15.3027,-16.1409,-15.3663,-13.0207,-14.0520,-13.0226,-13.8289,-16.1247,-15.3342,-16.1407,-15.4146,-16.1332,-14.3432,-16.1210,-15.0689
Mean: -12.7843


Summary:   

| Model                       | Score   |
|-----------------------------|---------|
| KNeighborsRegressor         | 17.7025 |
| GradientBoostingRegressor   | 10.1378 |
| **RandomForestRegressor**   | 8.9758  |
| ElasticNet                  | 14.6441 |
| SVR                         | 14.6008 |
| MLPRegressor                | 12.7843 |

### **Previous cases as iterative predictions**

### **Dedicated library**