En este notebook vamos a entrenar los modelos que usan perceptrón multicapa. Para ello nos vamos a apoyar en la líbrería skcit-learn que junto a MLPRegressor nos permite entrenar modelos de predicción sobre una variable continua con perceptrón multicapa.

Importamos las librerías externas como dependencias de nuestro propio repositorio.

In [1]:
import sys
sys.path.append("../../") # go to parent dir

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV, GridSearchCV, cross_validate
from sklearn.neural_network import MLPRegressor as MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from modelos import *
from sklearn.compose import ColumnTransformer
from modelos import RANDOM_SEED
from scipy.stats import randint
import random

In [3]:
pd.options.display.max_columns = 50 # para mostrar todas las columnas

In [4]:
#Fijamos la semilla que venimos usando en todo el repositorio.
RANDOM_STATE = RANDOM_SEED

In [5]:
# cargamos datos
X_train, X_test, y_train, y_test = sep_train_test()
# Eliminamos las columnas repetidas (visto en el notebook del analisis)
X_train = X_train.drop(columns=["Temperatura", 'wspd', 'anio', 'mes', 'dia', 'hora'])
X_test = X_test.drop(columns=["Temperatura", 'wspd', 'anio', 'mes', 'dia', 'hora'])

In [6]:
X_train.head()

Unnamed: 0,moonphase,temp,dewPt,heat_index,rh,pressure,vis,wc,feels_like,uv_index,wdir_E,wdir_ENE,wdir_ESE,wdir_N,wdir_NE,wdir_NNE,wdir_NNW,wdir_NW,wdir_S,wdir_SE,wdir_SSE,wdir_SSW,wdir_SW,wdir_VAR,wdir_W,wdir_WNW,wdir_WSW,Vviento,PeriodoOlas,Lluvia,Nubosidad
0,0.48,48.0,43.0,48.0,82.0,30.46,6.0,42.5,42.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,14,8,0.0,7
1,0.48,46.0,43.0,46.0,87.0,30.46,6.0,41.0,41.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,12,8,0.0,69
2,0.48,45.0,43.0,45.0,93.0,30.49,6.0,39.0,39.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,10,9,0.0,100
3,0.48,45.0,42.0,45.0,90.0,30.49,6.0,40.0,40.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,9,9,0.0,89
4,0.48,44.0,41.0,44.0,90.0,30.52,6.0,39.5,39.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,9,8,0.0,100


In [7]:
X_train.shape

(8396, 31)

**IMPORTANTE**

A lo largo de este notebook vamos a ir guardando los modelos en un mismo experimento llamado mlperceptron, y cada modelo distinto se verá reflejado en MlFlow como una run con un nombre único.

# **ENTRENAMIENTO DE LOS MODELOS**


SIN ESCALADO DE VARIABLES
-

Primero vamos a entrenar los modelos sin escalado de variables. Más tarde repetiremos el proceso con variables escaladas para ver si hallamos una mejoría.

**MODELO COTA INFERIOR**

Este modelo es un baseline con los parámetros por defecto para ver si ajustando los hiperparámetros mejoramos el modelo.


Usamos cv para obtener las métricas de este proceso, pero no para seleccionar el mejor ajuste (suele ser el motivo de usar cv).

In [9]:
def modelo_cota_inferior(x_tr, x_te, y_tr, y_te, run_name,exp_name="mlperceptron", exp_info="Modelo perceptrón sin modificar hiperparámetros con cv"):
    #Guardamos el modelo en el experimento mlperceptron
    flo = MLFlow(exp_name)
    #Generamos la instancia de MLP con los parámetros por defecto.
    lr = MLPRegressor(random_state= RANDOM_STATE)
    #Aplicamos cv con la funcion de nuestro repositorio que usa cv = 5 y guardamos las métricas
    cv_metrics = cross_validate(lr, x_tr, y_tr, cv=cv_folds(), scoring="neg_root_mean_squared_error", return_train_score=True)
    #Ahora entrenamos el modelo baseline con todos los datos de entrenamiento
    lr.fit(x_tr, y_tr)

    # metricas TEST
    metricas = calcular_metricas(y_te, lr.predict(x_te))
    # metricas CV
    metricas["CV_TEST_RMSE"] = -1 * cv_metrics['test_score'].mean()
    metricas["CV_TRAIN_RMSE"] = -1 * cv_metrics['train_score'].mean()

    #Añadimos el modelo a MlFlow
    flo.persist_model_to_mlflow(x_tr, lr, {"random_state": RANDOM_STATE}, metricas, run_name, exp_info)


In [11]:
modelo_cota_inferior(X_train, X_test, y_train, y_test, "perceptron-cota-inferior")

2024/04/27 20:15:24 INFO mlflow.tracking.fluent: Experiment with name 'mlperceptron' does not exist. Creating a new experiment.
Successfully registered model 'perceptron-cota-inferior'.
2024/04/27 20:16:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: perceptron-cota-inferior, version 1
Created version '1' of model 'perceptron-cota-inferior'.


OPTIMIZANDO HIPERPARÁMETROS CON **GRIDSEARCH**

In [12]:
params = {
    'hidden_layer_sizes': [(20,),(60,),(100,),
                           (20,20), (60,60), (100,100),
                          (20,20, 20), (60,60,60), (100,100,100)],
    'activation': ['relu', 'logistic'],
    'batch_size': [100, X_train.shape[0] // 2, X_train.shape[0]],
    'alpha': [0.0001, 0.01, 0.01, 0.1]
}



In [13]:
def modelo_grid_search(x_tr, x_te, y_tr, y_te,run_name, exp_info, exp_name="mlperceptron"):
    #Guardamos el modelo en el experimento mlperceptron
    flo = MLFlow(exp_name)
    #Generamos la instancia de MLPRegressor.
    modelo = MLPRegressor(random_state= RANDOM_STATE)
    #Entrenamos multiples modelos con GridSearchCV. Introducimos cv del repositorio y los parámetros definidos anteriormente.
    mlpSearch = GridSearchCV(modelo , params, cv=cv_folds(), return_train_score=True, scoring="neg_root_mean_squared_error", n_jobs = -1)
    mlpSearch.fit(x_tr, y_tr)

    #Añadimos el modelo a MlFlow. Las métricas las calculamos con la función calcular_metricas_search que sirve para GridSearchCv y RandomizedSearchCV.
    flo.persist_model_to_mlflow(x_tr, mlpSearch.best_estimator_, mlpSearch.best_params_, calcular_metricas_search(mlpSearch, x_te, y_te), run_name, "Mejor modelo conseguido con GridSearchCV sin escalado de variables")

In [14]:
modelo_grid_search(X_train, X_test, y_train, y_test, "mlp-grid-search", "Modelo perceptrón con GridSearchCV")

Successfully registered model 'mlp-grid-search'.
2024/04/27 20:44:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: mlp-grid-search, version 1
Created version '1' of model 'mlp-grid-search'.


In [15]:
params_random_search = {
    "hidden_layer_sizes": [(random.randint(20, 100),),
        (random.randint(20, 100), random.randint(20, 100)),
        (random.randint(20, 100), random.randint(20, 100), random.randint(20, 100))],
    "alpha": np.arange(0.0001, 1.0, 0.01),
    "batch_size" : range(100, X_train.shape[0]),
    "activation" : ["relu", "logistic"]
}

In [16]:
def modelo_random_search(x_tr, x_te, y_tr, y_te,run_name, exp_info, exp_name="mlperceptron"):
    #Guardamos el modelo en el experimento mlperceptron
    flo = MLFlow(exp_name)
    #Generamos la instancia de MLPRegressor.
    modelo = MLPRegressor(random_state= RANDOM_STATE)
    #Entrenamos 50 modelos con ReandomizedSearchCV. Introducimos cv del repositorio y el rango de parámetros definidos anteriormente.
    #El rango de parámetros es muy similar al de GridSearchCv para poder hacer comparaciones.
    mlpSearch = RandomizedSearchCV(modelo , param_distributions = params_random_search, cv=cv_folds(), return_train_score=True, scoring="neg_root_mean_squared_error", n_jobs = -1, n_iter = 50, random_state = RANDOM_STATE)
    mlpSearch.fit(x_tr, y_tr)
    
    #Añadimos el modelo a MlFlow.
    flo.persist_model_to_mlflow(x_tr, mlpSearch.best_estimator_, mlpSearch.best_params_, calcular_metricas_search(mlpSearch, x_te, y_te), run_name, "Mejor modelo conseguido con RandomizedSearchCV sin escalado de variables")

In [17]:
modelo_random_search(X_train, X_test, y_train, y_test, "mlp-random-search", "Modelo perceptrón con RandomSearch")

Successfully registered model 'mlp-random-search'.
2024/04/27 20:52:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: mlp-random-search, version 1
Created version '1' of model 'mlp-random-search'.


ESCALANDO VARIABLES
-

In [18]:
X_train

Unnamed: 0,moonphase,temp,dewPt,heat_index,rh,pressure,vis,wc,feels_like,uv_index,wdir_E,wdir_ENE,wdir_ESE,wdir_N,wdir_NE,wdir_NNE,wdir_NNW,wdir_NW,wdir_S,wdir_SE,wdir_SSE,wdir_SSW,wdir_SW,wdir_VAR,wdir_W,wdir_WNW,wdir_WSW,Vviento,PeriodoOlas,Lluvia,Nubosidad
0,0.48,48.0,43.0,48.0,82.0,30.460,6.0,42.5,42.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,14,8,0.0,7
1,0.48,46.0,43.0,46.0,87.0,30.460,6.0,41.0,41.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,12,8,0.0,69
2,0.48,45.0,43.0,45.0,93.0,30.490,6.0,39.0,39.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,10,9,0.0,100
3,0.48,45.0,42.0,45.0,90.0,30.490,6.0,40.0,40.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,9,9,0.0,89
4,0.48,44.0,41.0,44.0,90.0,30.520,6.0,39.5,39.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,9,8,0.0,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8391,0.29,49.0,48.0,49.0,97.0,29.170,5.5,47.5,47.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,10,11,0.3,100
8392,0.29,48.0,46.0,48.0,93.0,29.185,4.0,44.5,44.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,20,11,0.4,100
8393,0.29,46.5,44.5,46.5,93.0,29.200,4.5,44.5,44.5,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,4,11,0.4,100
8394,0.29,45.0,43.0,45.0,93.0,29.230,3.0,40.0,40.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,11,11,0.0,100


In [19]:
# Para probar con datos escalados
scaler_x = ColumnTransformer(
    transformers=[
        # no escalonamos las dummy de dirección de viento
        ("num", StandardScaler(), [c for c in X_train.columns if not c.startswith('wdir_')]),
    ],
    remainder='passthrough'
)
scaler_x.fit(X_train)

In [20]:
X_train_escalado = scaler_x.transform(X_train)
X_test_escalado = scaler_x.transform(X_test)

In [18]:
# columnas_numericas = [c for c in X_train.columns if not c.startswith('wdir_')]
# columnas_dir_viento = [c for c in X_train.columns if c.startswith('wdir_')]
# nuevo_orden_columnas = columnas_numericas + columnas_dir_viento

In [21]:
# Para probar con datos escalados
scaler_y = StandardScaler()

y_train_2d = y_train.values.reshape(-1, 1)
y_train_2d_scaled= scaler_y.fit_transform(y_train_2d)
y_train_1d_scaled  = y_train_2d_scaled.ravel()

y_test_2d = y_test.values.reshape(-1, 1)
y_test_2d_scaled = scaler_y.transform(y_test_2d)
y_test_1d_scaled  = y_test_2d_scaled.ravel()

**MODELO COTA INFERIOR**

In [22]:
def modelo_cota_inferior_escalado(x_tr, x_te, y_tr, y_te, run_name,exp_name="mlperceptron", exp_info="Modelo perceptrón escalado sin modificar hiperparámetros con cv"):
    flo = MLFlow(exp_name)
    lr = MLPRegressor(random_state= RANDOM_STATE)
    cv_metrics = cross_validate(lr, x_tr, y_tr, cv=cv_folds(), scoring="neg_root_mean_squared_error", return_train_score=True)
    lr.fit(x_tr, y_tr)

    predicciones = lr.predict(x_te)
    pred_lr_2d = predicciones.reshape(-1, 1)
    pred_lr_2d_escala_normal = scaler_y.inverse_transform(pred_lr_2d)
    # metricas TEST
    metricas = calcular_metricas(y_te, pred_lr_2d_escala_normal)
    # metricas CV
    metricas["CV_TEST_RMSE"] = -1 * cv_metrics['test_score'].mean()
    metricas["CV_TRAIN_RMSE"] = -1 * cv_metrics['train_score'].mean()
    
    flo.persist_model_to_mlflow(x_tr, lr, {"random_state": RANDOM_STATE}, metricas, run_name, exp_info)


In [23]:
modelo_cota_inferior_escalado(X_train_escalado, X_test_escalado, y_train_1d_scaled, y_test, "perceptron-cota-inferior-escalado")

Successfully registered model 'perceptron-cota-inferior-escalado'.
2024/04/27 21:25:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: perceptron-cota-inferior-escalado, version 1
Created version '1' of model 'perceptron-cota-inferior-escalado'.


**GRID SEARCH**

In [24]:
def calcular_metricas_search_escalando_y(search, X_test, y_test, scaler_y):

    predicciones = search.best_estimator_.predict(X_test)
    #Invertimos la escala de las predicciones
    predicciones_best_model_2d = predicciones.reshape(-1, 1)
    predicciones_2d = scaler_y.inverse_transform(predicciones_best_model_2d)

    # metricas TEST
    metricas = calcular_metricas(y_test, predicciones_2d)
    # metricas CV
    ind = search.best_index_
    metricas["CV_TEST_RMSE"] = -1 * search.cv_results_["mean_test_score"][ind]
    metricas["CV_TRAIN_RMSE"] = -1 * search.cv_results_["mean_train_score"][ind]
    return metricas

In [25]:
def modelo_grid_search_escalado(x_tr, x_te, y_tr, y_te,run_name, exp_info, exp_name="mlperceptron"):
    flo = MLFlow(exp_name)
    modelo = MLPRegressor(random_state= RANDOM_STATE)
    mlpSearch = GridSearchCV(modelo , params, cv=cv_folds(), return_train_score=True, scoring="neg_root_mean_squared_error", n_jobs = -1)
    mlpSearch.fit(x_tr, y_tr)
    flo.persist_model_to_mlflow(x_tr, mlpSearch.best_estimator_, mlpSearch.best_params_, calcular_metricas_search_escalando_y(mlpSearch, x_te, y_te, scaler_y), run_name, "Mejor modelo conseguido con GridSearchCV con escalado de variables")

In [26]:
modelo_grid_search_escalado(X_train_escalado, X_test_escalado, y_train_1d_scaled, y_test, "esc-mlp-grid-search","Modelo GridSearchCv escalando las variables")

Successfully registered model 'esc-mlp-grid-search'.
2024/04/27 21:59:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: esc-mlp-grid-search, version 1
Created version '1' of model 'esc-mlp-grid-search'.


**RANDOM SEARCH**

In [27]:
def modelo_random_search_escalado(x_tr, x_te, y_tr, y_te,run_name, exp_info, exp_name="mlperceptron"):
    flo = MLFlow(exp_name)
    modelo = MLPRegressor(random_state= RANDOM_STATE)
    mlpSearch = RandomizedSearchCV(modelo , param_distributions = params_random_search, cv=cv_folds(), return_train_score=True, scoring="neg_root_mean_squared_error", n_jobs = -1, n_iter = 50, random_state = RANDOM_STATE)
    mlpSearch.fit(x_tr, y_tr)
    flo.persist_model_to_mlflow(x_tr, mlpSearch.best_estimator_, mlpSearch.best_params_, calcular_metricas_search_escalando_y(mlpSearch, x_te, y_te, scaler_y), run_name, "Mejor modelo conseguido con RandomizedSearchCV con escalado de variables")

In [28]:
modelo_random_search_escalado(X_train_escalado, X_test_escalado, y_train_1d_scaled, y_test, "esc-mlp-random-search","Modelo RandomSearchCv escalando las variables")

Successfully registered model 'esc-mlp-random-search'.
2024/04/27 22:07:39 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: esc-mlp-random-search, version 1
Created version '1' of model 'esc-mlp-random-search'.


# FIN