# Comparación red neuronal regresión

## Importar librerías

In [22]:
import os
from sklearn.datasets import fetch_california_housing
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras import regularizers
from sklearn.datasets import load_breast_cancer
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import keras_tuner as kt
import numpy as np
import tensorflow as tf
import random as random
import pandas as pd

In [None]:
# Comprobar GPU y configurar memoria
print(tf.__version__)

gpus = tf.config.list_physical_devices("GPU")
print("GPUs:", gpus)
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU habilitada")
    except RuntimeError as e:
        print("Error al configurar GPU:", e)
else:
    print("No se detectaron GPUs")

## Importar datos

In [None]:
california_housing = fetch_california_housing(as_frame=True)

df = california_housing.frame

## Análisis explotario de datos

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
# Ver distribución de las variables
import matplotlib.pyplot as plt

df.hist(bins=30, figsize=(15, 10))
plt.tight_layout()
plt.show()

In [None]:
# Definir semilla para reproducibilidad
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
set_seed(42)

In [None]:
# Configurar ruta de salida de modelos en el directorio de trabajo 
output_dir = os.path.join(os.getcwd(), 'models/')

In [None]:
# Escalar los datos (features y variable objetivo)
from sklearn.preprocessing import StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(df.drop(columns=['MedHouseVal']))
y = df['MedHouseVal'].values.reshape(-1, 1)
y_scaled = scaler_y.fit_transform(y).ravel()

In [None]:
# Dividir los datos en conjunto de entrenamiento y prueba (usando los datos escalados)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train_scaled, y_test_scaled = train_test_split(X_scaled, y_scaled, test_size=0.2)

## Optimizador Bayesiano

In [12]:
# Función para crear el modelo
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))
    
    num_layers = hp.Int('num_layers', min_value=1, max_value=5, step=1)
    l2_factor = hp.Choice('l2_factor', values=[0.0, 1e-5, 1e-4, 1e-3])
    dropout_rate = hp.Choice('dropout_rate', values=[0.0, 0.1, 0.2, 0.3])
    
    for i in range(num_layers):
        units = hp.Int(f'units_{i}', min_value=16, max_value=256, step=16)
        activation = hp.Choice(f'activation_{i}', values=['relu', 'tanh', 'sigmoid'])
        model.add(
            Dense(
                units=units,
                activation=activation,
                kernel_regularizer=regularizers.l2(l2_factor)
            )
        )
        if dropout_rate > 0:
            model.add(Dropout(dropout_rate))
            
    model.add(Dense(1, activation='sigmoid'))  # Capa de salida para regresión
        
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

In [13]:
# Configurar primer optimizador de hiperparámetros

turner = kt.BayesianOptimization(
    build_model,
    objective='val_mae',
    max_trials=20,
    num_initial_points=10,
    executions_per_trial=1,
    directory=output_dir,
    project_name='california_housing_regression',
    seed=42
)

es_tuner = EarlyStopping(monitor='val_mae', patience=10, restore_best_weights=True)

turner.search(
    X_train,
    y_train_scaled,
    epochs=50,
    validation_data=(X_test, y_test_scaled),
    callbacks=[es_tuner],
)

best_hps = turner.get_best_hyperparameters(num_trials=1)[0]
print(f"Mejores hiperparámetros: {best_hps.values}")

Reloading Tuner from /home/jovyan/work/models/california_housing_regression/tuner0.json
Mejores hiperparámetros: {'num_layers': 3, 'units_0': 112, 'activation_0': 'tanh', 'units_1': 80, 'activation_1': 'sigmoid', 'units_2': 64, 'activation_2': 'relu', 'units_3': 16, 'activation_3': 'sigmoid', 'units_4': 80, 'activation_4': 'sigmoid', 'l2_factor': 0.0, 'dropout_rate': 0.0, 'learning_rate': 0.01, 'optimizer': 'rmsprop'}


In [14]:
# Modelo final con los mejores hiperparámetros
model = turner.hypermodel.build(best_hps)

cp = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_mae', mode='min')
es = EarlyStopping(monitor='val_mae', patience=15, restore_best_weights=True)

model.fit(
    X_train,
    y_train_scaled,
    epochs=200,
    validation_data=(X_test, y_test_scaled),
    callbacks=[cp, es],
    verbose=0
)

test_metrics = model.evaluate(X_test, y_test_scaled, verbose=0)
print(f"MAE en el conjunto de prueba (escalado): {test_metrics[1]}")

y_pred_scaled = model.predict(X_test, verbose=0).ravel()

MAE en el conjunto de prueba (escalado): 0.6333106756210327


In [None]:
# Desescalar predicciones y valores reales para calcular el MAE en la escala original
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_test_real = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).ravel()

mae = mean_absolute_error(y_test_real, y_pred)
print(f"MAE en validación: {mae}")

In [None]:
# Comparar valores originales vs predichos (desescalados)

# Desescalar ambos arrays
y_test_real = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).ravel()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

# Crear un DataFrame para comparar
comparacion = pd.DataFrame({
    'Valor real': y_test_real,
    'Predicción': y_pred
})

comparacion.head(10)

In [None]:
# Visualizar errores de predicción (valor real vs predicho)
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(y_test_real, y_pred, alpha=0.5)
plt.plot([y_test_real.min(), y_test_real.max()], [y_test_real.min(), y_test_real.max()], 'r--')
plt.xlabel('Valor real')
plt.ylabel('Predicción')
plt.title('Comparación valor real vs predicción')
plt.show()

## Random Search

In [15]:
def build_model(hp):
    model = Sequential()

    # Definir número de capas
    for i in range(hp.Int('num_layers', 2, 5)):  # Probar entre 2 y 5 capas
        model.add(Dense(units=hp.Int(f'units_{i}', min_value=16, max_value=64, step=16),
                        activation=hp.Choice('activation', values=['relu', 'tanh', 'sigmoid'])))

    model.add(Dense(1, activation='sigmoid'))  # Capa de salida

    # Definir el optimizador con el learning rate como hiperparámetro
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
                  loss='mse',
                  metrics=['mae'])
    return model

In [18]:
# Crear el tuner
tuner = kt.RandomSearch(
    build_model,  # La función que construye el modelo
    objective='val_mae',  # Objetivo a optimizar
    max_trials=5,  # Número máximo de pruebas
    executions_per_trial=1,  # Número de ejecuciones por prueba
    directory=output_dir,  # Directorio donde guardar los resultados
    project_name='california_housing_random_search',  # Nombre del proyecto
    seed=5
)

# Ejecutar la búsqueda
tuner.search(X_train, y_train_scaled, epochs=30, validation_data=(X_test, y_test_scaled))

# Obtener los mejores hiperparámetros
best_hps = tuner.get_best_hyperparameters()[0]
print(f"Mejores hiperparámetros: {best_hps.values}")

Trial 5 Complete [00h 00m 15s]
val_mae: 0.6413864493370056

Best val_mae So Far: 0.6380404829978943
Total elapsed time: 00h 01m 24s
Mejores hiperparámetros: {'num_layers': 4, 'units_0': 64, 'activation': 'relu', 'units_1': 16, 'learning_rate': 0.0008433633337952192, 'units_2': 16, 'units_3': 16}


In [20]:
# Crear el modelo final con los mejores hiperparámetros
model = tuner.hypermodel.build(best_hps)

cp = ModelCheckpoint(filepath="best_ckpt.r.weights.h5", monitor='val_mae', save_best_only=True, save_weights_only=True)
es = EarlyStopping(monitor='val_mae', patience=5, restore_best_weights=True)

model.fit(X_train, y_train_scaled,
          epochs=30,
          validation_data=(X_test, y_test_scaled),
          callbacks=[cp, es],
          verbose=0)



# Evaluar el modelo en el conjunto de prueba
test_accuracy = model.evaluate(X_test, y_test_scaled)
print(f"Precisión en el conjunto de prueba: {test_accuracy[1]}")

[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step - loss: 0.5925 - mae: 0.6325
Precisión en el conjunto de prueba: 0.6343444585800171


In [24]:
# Comparar valores originales vs predichos (desescalados)

# Desescalar ambos arrays
y_test_real = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).ravel()
y_pred_scaled = model.predict(X_test, verbose=0).ravel()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

comparacion = pd.DataFrame({
    'Valor real': y_test_real,
    'Predicción': y_pred
})

comparacion.head(10)

Unnamed: 0,Valor real,Predicción
0,0.477,2.068558
1,0.458,2.070089
2,5.00001,3.22226
3,2.186,2.707232
4,2.78,2.844649
5,1.587,2.069533
6,1.982,2.300183
7,1.575,2.068673
8,3.4,2.596388
9,4.466,3.222483
