In [4]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_validate
from sklearn.neural_network import MLPRegressor
import mlflow
import os
from sklearn.metrics import mean_absolute_error

In [5]:
# Establecer la URI de la base de datos SQLite
os.environ['MLFLOW_TRACKING_URI'] = 'sqlite:///mlruns.db'

# Configuración de MLflow
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("redes_neuronales")

<Experiment: artifact_location='mlflow-artifacts:/2', creation_time=1713175269356, experiment_id='2', last_update_time=1713175269356, lifecycle_stage='active', name='redes_neuronales', tags={'mlflow.sharedViewState.35fcf56d61b8dbb98512f6829913993e41257533dd332e1e8ff07999f73a21ea': '{"searchFilter":"","orderByKey":"attributes.start_time","orderByAsc":false,"startTime":"ALL","lifecycleFilter":"Active","datasetsFilter":[],"modelVersionFilter":"All '
                                                                                            'Runs","selectedColumns":["attributes.`Source`","attributes.`Models`","attributes.`Dataset`"],"runsExpanded":{},"runsPinned":[],"runsHidden":[],"runsHiddenMode":"FIRST_10_RUNS","compareRunCharts":[{"uuid":"1713177259273xowwzda7","type":"BAR","runsCountToCompare":10,"metricSectionId":"171317725927396vp3vfh","deleted":false,"isGenerated":true,"metricKey":"best_mean_test_accuracy"},{"uuid":"1713177259273vnosmd9g","type":"BAR","runsCountToCompare":10,"metri

# Sin Hiperparámetros

In [5]:
# Definir listas para almacenar los resultados de entrenamiento y prueba para cada validación cruzada
train_maes = []
test_maes = []

# Iniciar run de MLflow
with mlflow.start_run(run_name="modelo_completo_sinparam"):
    # Escalado de características
    import preprocesamiento_datos
    X_train, X_val, X_test, y_train, y_val, y_test, RANDOM_STATE = preprocesamiento_datos.preprocesamiento(True,[], False)

    # Sin definición de hiperparámetros
    model = MLPRegressor()

    # Entrenar el modelo con X_train y y_train
    model.fit(X_train, y_train)

    # Realizar predicciones en el conjunto de validación y de prueba
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)

    # Calcular el error absoluto medio (MAE) para evaluar el modelo en ambos conjuntos
    val_mae = mean_absolute_error(y_val, y_val_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)

    # Guardar los resultados de validación y prueba en MLflow
    mlflow.log_metric("val_mae", val_mae)
    mlflow.log_metric("test_mae", test_mae)


Archivo Datos_la_liga_preparados_entrenamiento.parquet guardado en: ../Downloads\Datos_la_liga_preparados_entrenamiento.parquet




# Con Hiperparámetros

In [8]:
import itertools
import preprocesamiento_datos
X_train, X_val, X_test, y_train, y_val, y_test, RANDOM_STATE = preprocesamiento_datos.preprocesamiento(True,[], False)
# Define una lista de rangos para el número de neuronas en cada capa
neuronas_por_capa = list(range(0, 190, 10))  # Hasta 201 inclusive

# Define una lista de números de capas
num_capas = list(range(1, 4))  # Desde una capa hasta tres capas

# Inicializa una lista vacía para almacenar las combinaciones
combinaciones = []

# Genera combinaciones para 1 capa
for neurons in neuronas_por_capa:
    combinaciones.append((neurons,))

# Genera combinaciones para 2 capas
if len(num_capas) >= 2:
    for neurons1 in neuronas_por_capa:
        for neurons2 in neuronas_por_capa:
            combinaciones.append((neurons1, neurons2))

# Genera combinaciones para 3 capas
if len(num_capas) >= 3:
    for neurons1 in neuronas_por_capa:
        for neurons2 in neuronas_por_capa:
            for neurons3 in neuronas_por_capa:
                combinaciones.append((neurons1, neurons2, neurons3))

# Ahora, 'combinaciones' contiene todas las combinaciones de capas ocultas
combinaciones

[(0,),
 (10,),
 (20,),
 (30,),
 (40,),
 (50,),
 (60,),
 (70,),
 (80,),
 (90,),
 (100,),
 (110,),
 (120,),
 (130,),
 (140,),
 (150,),
 (160,),
 (170,),
 (180,),
 (0, 0),
 (0, 10),
 (0, 20),
 (0, 30),
 (0, 40),
 (0, 50),
 (0, 60),
 (0, 70),
 (0, 80),
 (0, 90),
 (0, 100),
 (0, 110),
 (0, 120),
 (0, 130),
 (0, 140),
 (0, 150),
 (0, 160),
 (0, 170),
 (0, 180),
 (10, 0),
 (10, 10),
 (10, 20),
 (10, 30),
 (10, 40),
 (10, 50),
 (10, 60),
 (10, 70),
 (10, 80),
 (10, 90),
 (10, 100),
 (10, 110),
 (10, 120),
 (10, 130),
 (10, 140),
 (10, 150),
 (10, 160),
 (10, 170),
 (10, 180),
 (20, 0),
 (20, 10),
 (20, 20),
 (20, 30),
 (20, 40),
 (20, 50),
 (20, 60),
 (20, 70),
 (20, 80),
 (20, 90),
 (20, 100),
 (20, 110),
 (20, 120),
 (20, 130),
 (20, 140),
 (20, 150),
 (20, 160),
 (20, 170),
 (20, 180),
 (30, 0),
 (30, 10),
 (30, 20),
 (30, 30),
 (30, 40),
 (30, 50),
 (30, 60),
 (30, 70),
 (30, 80),
 (30, 90),
 (30, 100),
 (30, 110),
 (30, 120),
 (30, 130),
 (30, 140),
 (30, 150),
 (30, 160),
 (30, 170),
 (3

In [9]:
# Crea el parámetro param_grid con todas las combinaciones generadas
# Crea el parámetro param_grid con todas las combinaciones generadas
param_random = {
    'hidden_layer_sizes': combinaciones, 
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [10**exp for exp in range(-5, 2)],
    'batch_size': [16, 32, 64, 128],
    'learning_rate_init': [10**exp for exp in range(-4, 0)]
}

mlp = MLPRegressor(random_state=RANDOM_STATE)

random_search = RandomizedSearchCV(estimator=mlp, param_distributions=param_random, scoring='neg_mean_absolute_error', cv=10, n_jobs=-1, n_iter = 100)


with mlflow.start_run(run_name="MLP_Random_Search"):
    mlflow.log_param("random_state", RANDOM_STATE)

    random_search.fit(X_train, y_train)
    best_params_grid = random_search.best_params_

    best_model_grid = MLPRegressor(random_state=RANDOM_STATE, **best_params_grid)
    best_model_grid.fit(X_train, y_train)

    y_pred_val_grid = best_model_grid.predict(X_val)
    y_pred_test_grid = best_model_grid.predict(X_test)

    error_val_grid = mean_absolute_error(y_val, y_pred_val_grid)
    error_test_grid = mean_absolute_error(y_test, y_pred_test_grid)

    mlflow.log_params(best_params_grid)
    mlflow.log_metric("random_search_validation_error", error_val_grid - 400000)
    mlflow.log_metric("random_search_test_error", error_test_grid - 400000)
    mlflow.sklearn.log_model(best_model_grid, "best_model_random")

423 fits failed out of a total of 1000.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
263 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Usuario\Documents\Alvaro\Carrera\2-Segundo\Primer cuatrimetre\Fundamentos de la Inteligencia Artificial\PycharmProjects\proyecto\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Usuario\Documents\Alvaro\Carrera\2-Segundo\Primer cuatrimetre\Fundamentos de la Inteligencia Artificial\PycharmProjects\proyecto\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^