## **Predicting Disease Spread: Model for Iquitos**

### **Imports**

In [1]:
import sys
import os

import numpy as np
import pandas as pd

sys.path.append(os.path.abspath(".."))
from src.data_setup import load_data, split_cities
from src.model import (
    ModelTrainer,
    supporting_model
)

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor


### **Data**

In [2]:
features_train, labels_train = load_data(train=True)
sj_features_train, iq_features_train = split_cities(features_train)
sj_labels_train, iq_labels_train = split_cities(labels_train)

In [3]:
features_test, labels_test = load_data(train=False)
sj_features_test, iq_features_test = split_cities(features_test)

#### Drop only non-numeric, irrelevant column

In [4]:
iq_features_train_num = iq_features_train.drop(columns=['week_start_date'])
iq_features_test_num = iq_features_test.drop(columns=['week_start_date'])
iq_features_test_added_prev_3 = pd.read_csv("../data/processed/iq_features_test_added_prev_3.csv")

### **Prev cases as predictions of another model**

#### Best supporting model choice 

In [5]:
knn = KNeighborsRegressor()
rf = RandomForestRegressor()
en = ElasticNet()
svr = SVR()
boosting = GradientBoostingRegressor()
mlp = MLPRegressor()

In [6]:
# Parameter grid for KNeighborsRegressor
PARAM_GRID_KNN = {
    'n_neighbors': [3, 5, 7, 9, 13, 17],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

# Parameter grid for RandomForestRegressor
PARAM_GRID_RF = {
    'n_estimators': [500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Parameter grid for GradientBoostingRegressor
PARAM_GRID_BOOSTING = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Parameter grid for ElasticNet
PARAM_GRID_EN = {
    'alpha': [0.01, 0.1, 1.0, 10.0, 100.0],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99]
}

# Parameter grid for SVR
PARAM_GRID_SVR = {
    'C': [0.1, 1, 10, 100],
    'epsilon': [0.01, 0.1, 0.2, 0.5],
    'kernel': ['linear', 'rbf', 'poly']
}

# Parameter grid for MLPRegressor
PARAM_GRID_MLP = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive'],
    'alpha': [0.0001, 0.001, 0.01]
}

In [7]:
K_PREV = 3
IMPUTATION_METHOD = "linear"
TOP_N_FEATURES = 10
CORR_METHOD = "pearson"
SCALER = "standard"

In [None]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=ConvergenceWarning)

supp = supporting_model(
    iq_features_train_num,
    iq_labels_train,
    iq_features_test_num,
    [knn, rf, en, svr, boosting, mlp],
    [PARAM_GRID_KNN, PARAM_GRID_RF, PARAM_GRID_EN, PARAM_GRID_SVR, PARAM_GRID_BOOSTING, PARAM_GRID_MLP],
)

Summary of the best supporting model choice: 
| Model                     | Score     |
|---------------------------|-----------|
| KNeighborsRegressor       | 6.9542    |
| RandomForestRegressor     | 7.0734    |
| ElasticNet                | 6.6470    |
| SVR                       | 6.1562  |
| GradientBoostingRegressor | 6.7217  |
| **MLPRegressor**              | 6.6006  |

In [9]:
iq_features_test_added_prev = supp["X_test"]

In [10]:
iq_features_test_added_prev.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,reanalysis_avg_temp_k,reanalysis_dew_point_temp_k,reanalysis_max_air_temp_k,reanalysis_min_air_temp_k,...,ndvi_ne_ndvi_nw_sum,ndvi_ne_ndvi_se_diff,ndvi_ne_ndvi_se_prod,ndvi_ne_ndvi_se_quot,ndvi_ne_ndvi_se_sum,ndvi_ne_ndvi_sw_diff,ndvi_ne_ndvi_sw_prod,1_prev_cases,2_prev_cases,3_prev_cases
year,weekofyear,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2010,26,0.183783,0.1425,0.225129,0.150214,82.29,297.648571,299.214286,296.118571,307.9,293.7,...,0.326283,-0.041345,0.041375,0.816348,0.408912,0.033569,0.027607,4.0,1.0,1.0


#### Models

In [9]:
model_trainer_knn = ModelTrainer(model=KNeighborsRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_KNN)

model_trainer_knn.fit(iq_features_train_num, iq_labels_train)
model_trainer_knn.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model KNeighborsRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -5.2750,-5.2903,-5.1635,-5.1857,-5.2177,-5.1968,-5.1204,-5.1116,-5.1852,-5.1637,-5.0692,-5.0589,-5.1436,-5.1231,-4.9577,-4.9471,-5.1033,-5.0824,-5.0223,-4.9988,-5.1486,-5.1231,-5.0687,-5.0406
Mean: -5.1165


In [11]:
model_trainer_boosting = ModelTrainer(model=GradientBoostingRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_BOOSTING)

model_trainer_boosting.fit(iq_features_train_num, iq_labels_train)
model_trainer_boosting.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model GradientBoostingRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.
CV results: -4.5556,-4.6481,-4.6220,-4.6887,-4.7230,-4.5836,-4.5921,-4.5935,-4.6318,-4.5606,-4.5897,-4.5573,-4.5359,-4.5014,-4.3387,-4.5636,-4.6963,-4.6466,-4.7331,-4.5940,-4.7160,-4.1744,-4.4141,-4.8362,-4.5128,-4.5125,-4.1287,-4.3673,-4.2422,-4.6326,-4.6514,-4.5420,-4.1674,-4.7568,-4.5461,-4.5207,-4.3107,-4.4441,-4.4546,-4.6500,-4.6600,-4.4676,-4.5558,-4.1884,-4.3701,-4.4962,-4.4831,-4.2984,-4.4584,-4.4286,-4.5186,-4.5867,-4.5047,-4.2824,-4.5479,-4.5345,-4.3781,-4.6253,-4.5353,-4.5154,-4.5883,-4.4392,-4.5225,-4.6819,-4.4440,-4.4635,-4.7504,-4.5780,-4.7016,-4.4203,-4.4423,-4.5217,-4.3180,-4.2787,-4.3967,-4.4064,-4.6395,-4.1304,-4.4716,-4.3986,-4.5547,-4.5965,-4.4651,-4.5029,-4.3945,-4.5242,-4.5099,-4.6382,-4.2772,-4.3404,-4.6827,-4.4016,-4.4235,-4.4873,-4.5712,-4.3332,-4.8664,-4.8438,-4.3698,-4.2899
Mean: -4.5064


In [12]:
model_trainer_rf = ModelTrainer(model=RandomForestRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_RF)

model_trainer_rf.fit(iq_features_train_num, iq_labels_train)
model_trainer_rf.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model RandomForestRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -4.0925,-4.0915,-4.0710,-4.0435,-4.0559,-4.0273,-3.9772,-3.9600,-3.9624,-4.0970,-4.0959,-4.1130,-4.0398,-4.0816,-4.0258,-3.9869,-3.9630,-3.9318,-4.1038,-4.1121,-4.0983,-4.0309,-4.0527,-4.0221,-3.9519,-3.9516,-3.9521,-4.0813,-4.0911,-4.0697,-4.0592,-4.0457,-4.0349,-3.9724,-3.9653,-3.9673
Mean: -4.0327


In [13]:
model_trainer_en = ModelTrainer(model=ElasticNet(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_EN)

model_trainer_en.fit(iq_features_train_num, iq_labels_train)
model_trainer_en.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model ElasticNet ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


CV results: -4.3088,-4.3104,-4.3130,-4.3174,-4.3239,-4.3259,-4.3277,-4.1523,-4.1437,-4.1441,-4.1380,-4.1349,-4.1339,-4.1334,-4.4584,-4.3844,-4.3367,-4.2616,-4.1656,-4.1335,-4.1033,-6.0725,-6.2659,-6.4972,-6.7049,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483,-6.8483
Mean: -5.2307


In [14]:
model_trainer_svr = ModelTrainer(model=SVR(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_SVR)

model_trainer_svr.fit(iq_features_train_num, iq_labels_train)
model_trainer_svr.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model SVR ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -3.8758,-5.9715,-5.9276,-3.8740,-5.9757,-5.9359,-3.8698,-5.9797,-5.9375,-3.8752,-5.9904,-5.9519,-3.8546,-4.8628,-5.4843,-3.8448,-4.8641,-5.4803,-3.8424,-4.8652,-5.4797,-3.8267,-4.8898,-5.4805,-3.9073,-4.2092,-5.6565,-3.9018,-4.2098,-5.6459,-3.8937,-4.2115,-5.6324,-3.8683,-4.2273,-5.5955,-3.9100,-5.0378,-8.3716,-3.9031,-5.0302,-8.3267,-3.8961,-5.0221,-8.2788,-3.8866,-5.0042,-8.1633
Mean: -5.0777


In [15]:
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=ConvergenceWarning)

model_trainer_mlp = ModelTrainer(model=MLPRegressor(),
                             imputation_method=IMPUTATION_METHOD,
                             top_n_features=TOP_N_FEATURES,
                             corr_method=CORR_METHOD,
                             scaling_method=SCALER,
                             k_prev_targets=K_PREV,
                             param_grid=PARAM_GRID_MLP)

model_trainer_mlp.fit(iq_features_train_num, iq_labels_train)
model_trainer_mlp.transform(iq_features_train_num, iq_labels_train, iq_features_test_added_prev_3)

--- Model MLPRegressor ---
Preprocessing started.
Preprocessing finished.
Training model.
Tunning model's hyperparameters.




CV results: -4.5187,-4.8462,-4.5729,-4.9247,-4.5173,-4.7707,-4.3917,-4.8133,-4.8264,-4.8509,-4.6494,-4.9822,-4.7210,-4.8118,-4.8893,-4.9915,-4.5840,-4.9267,-4.5804,-4.8674,-4.4134,-4.7166,-4.4481,-4.7977,-4.7731,-4.7803,-4.5709,-5.2355,-4.7638,-4.8533,-4.7879,-5.0048,-4.5172,-4.8904,-4.4915,-4.8849,-4.4705,-4.8854,-4.5027,-4.9448,-4.7366,-4.9062,-4.8148,-5.0268,-4.7456,-4.8362,-4.6442,-4.8272,-3.9964,-4.6729,-4.0500,-4.7422,-3.9892,-4.5801,-4.0147,-4.6826,-4.2112,-5.4240,-4.2477,-5.6387,-4.3226,-5.2173,-4.3177,-5.3462,-3.9903,-4.6898,-3.9837,-4.6772,-3.9368,-4.6403,-3.9854,-4.6986,-4.3510,-5.6738,-4.3035,-5.7443,-4.2686,-5.2730,-4.2480,-5.2784,-3.9654,-4.7051,-3.9734,-4.8125,-3.9454,-4.6581,-3.9171,-4.7469,-4.3489,-5.7421,-4.2405,-5.5001,-4.3441,-5.2779,-4.3620,-5.3636
Mean: -4.6813


Summary:   

| Model                       | Score   |
|-----------------------------|---------|
| KNeighborsRegressor         | 5.1165 |
| GradientBoostingRegressor   | 4.5064 |
| **RandomForestRegressor**   | 4.0327  |
| ElasticNet                  | 5.2307 |
| SVR                         | 5.0777 |
| MLPRegressor                | 4.6813 |

### **Previous cases as iterative predictions**

### **Dedicated library**