In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_validate
from sklearn.neural_network import MLPRegressor
import mlflow
import os
from sklearn.metrics import mean_absolute_error

In [2]:
# Establecer la URI de la base de datos SQLite
os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlruns.db'

# Configuración de MLflow
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("redes_neuronales")

<Experiment: artifact_location='mlflow-artifacts:/2', creation_time=1713175269356, experiment_id='2', last_update_time=1713175269356, lifecycle_stage='active', name='redes_neuronales', tags={'mlflow.sharedViewState.35fcf56d61b8dbb98512f6829913993e41257533dd332e1e8ff07999f73a21ea': '{"searchFilter":"","orderByKey":"attributes.start_time","orderByAsc":false,"startTime":"ALL","lifecycleFilter":"Active","datasetsFilter":[],"modelVersionFilter":"All '
                                                                                            'Runs","selectedColumns":["attributes.`Source`","attributes.`Models`","attributes.`Dataset`"],"runsExpanded":{},"runsPinned":[],"runsHidden":[],"runsHiddenMode":"FIRST_10_RUNS","compareRunCharts":[{"uuid":"1713177259273xowwzda7","type":"BAR","runsCountToCompare":10,"metricSectionId":"171317725927396vp3vfh","deleted":false,"isGenerated":true,"metricKey":"best_mean_test_accuracy"},{"uuid":"1713177259273vnosmd9g","type":"BAR","runsCountToCompare":10,"metri

# Sin Hiperparámetros

In [5]:
# Definir listas para almacenar los resultados de entrenamiento y prueba para cada validación cruzada
train_maes = []
test_maes = []

# Iniciar run de MLflow
with mlflow.start_run(run_name="modelo_completo_sinparam"):
    # Escalado de características
    import preprocesamiento_datos
    X_train, X_val, X_test, y_train, y_val, y_test, RANDOM_STATE = preprocesamiento_datos.preprocesamiento(True,[], False)

    # Sin definición de hiperparámetros
    model = MLPRegressor()

    # Entrenar el modelo con X_train y y_train
    model.fit(X_train, y_train)

    # Realizar predicciones en el conjunto de validación y de prueba
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)

    # Calcular el error absoluto medio (MAE) para evaluar el modelo en ambos conjuntos
    val_mae = mean_absolute_error(y_val, y_val_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)

    # Guardar los resultados de validación y prueba en MLflow
    mlflow.log_metric("val_mae", val_mae)
    mlflow.log_metric("test_mae", test_mae)


Archivo Datos_la_liga_preparados_entrenamiento.parquet guardado en: ../Downloads\Datos_la_liga_preparados_entrenamiento.parquet




# Con Hiperparámetros

In [4]:
X_train, X_val, X_test, y_train, y_val, y_test, RANDOM_STATE = preprocesamiento_datos.preprocesamiento(False,[], False)

param_grid = {
    'hidden_layer_sizes': [(24,), (100,), (50,)],
    'alpha': [0.0001, 0.001, 0.01],
    'batch_size': ['auto', 32, 64],
    'learning_rate_init': [0.001, 0.01, 0.1]
}

param_rand = {
    'hidden_layer_sizes': [(x,) for x in range(5, 201, 5)],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [10**exp for exp in range(-5, 2)],
    'batch_size': [16, 32, 64, 128],
    'learning_rate_init': [10**exp for exp in range(-4, 0)]
}

mlp = MLPRegressor(random_state=RANDOM_STATE)

grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, n_jobs=-1)

random_search = RandomizedSearchCV(estimator=mlp, param_distributions=param_rand, n_iter=100, scoring='neg_mean_absolute_error', cv=10, random_state=RANDOM_STATE, n_jobs=-1)
# Código anterior para preprocesamiento de datos y definición de parámetros de búsqueda omitido por brevedad

with mlflow.start_run(run_name="MLP_Grid_Search"):
    mlflow.log_param("random_state", RANDOM_STATE)

    grid_search.fit(X_train, y_train)
    best_params_grid = grid_search.best_params_

    best_model_grid = MLPRegressor(random_state=RANDOM_STATE, **best_params_grid)
    best_model_grid.fit(X_train, y_train)

    y_pred_val_grid = best_model_grid.predict(X_val)
    y_pred_test_grid = best_model_grid.predict(X_test)

    error_val_grid = mean_absolute_error(y_val, y_pred_val_grid)
    error_test_grid = mean_absolute_error(y_test, y_pred_test_grid)

    mlflow.log_params(best_params_grid)
    mlflow.log_metric("grid_search_validation_error", error_val_grid)
    mlflow.log_metric("grid_search_test_error", error_test_grid)
    mlflow.sklearn.log_model(best_model_grid, "best_model_grid")

with mlflow.start_run(run_name="MLP_Random_Search"):
    mlflow.log_param("random_state", RANDOM_STATE)

    random_search.fit(X_train, y_train)
    best_params_random = random_search.best_params_

    best_model_random = MLPRegressor(random_state=RANDOM_STATE, **best_params_random)
    best_model_random.fit(X_train, y_train)

    y_pred_val_random = best_model_random.predict(X_val)
    y_pred_test_random = best_model_random.predict(X_test)

    error_val_random = mean_absolute_error(y_val, y_pred_val_random)
    error_test_random = mean_absolute_error(y_test, y_pred_test_random)

    mlflow.log_params(best_params_random)
    mlflow.log_metric("random_search_validation_error", error_val_random)
    mlflow.log_metric("random_search_test_error", error_test_random)
    mlflow.sklearn.log_model(best_model_random, "best_model_random")


Archivo Datos_la_liga_preparados_entrenamiento.parquet guardado en: ../Downloads\Datos_la_liga_preparados_entrenamiento.parquet


340 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
340 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Usuario\Documents\Alvaro\Carrera\2-Segundo\Primer cuatrimetre\Fundamentos de la Inteligencia Artificial\PycharmProjects\proyecto\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Usuario\Documents\Alvaro\Carrera\2-Segundo\Primer cuatrimetre\Fundamentos de la Inteligencia Artificial\PycharmProjects\proyecto\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^