# Tarea 5

In [1]:
# Configuración para GPU

import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix

import optuna
import mlflow
import dagshub

np.random.seed(42)
tf.random.set_seed(42)

2025-11-26 23:48:48.534469: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Verificar y configurar GPU

gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs disponibles: {len(gpus)}")

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU configurada: {gpus[0].name}")
    except RuntimeError as e:
        print(f"Error al configurar GPU: {e}")

GPUs disponibles: 1
GPU configurada: /physical_device:GPU:0


In [4]:
# Dagshub
dagshub.init(repo_owner='404brainnotfound-ai', repo_name='Tarea_5', mlflow=True)
print(f"URI de seguimiento: {mlflow.get_tracking_uri()}")

URI de seguimiento: https://dagshub.com/404brainnotfound-ai/Tarea_5.mlflow


In [5]:
# Dataset
df = pd.read_csv('global_house_purchase_dataset.csv')
df = df.drop('property_id', axis=1)

print(f"Forma: {df.shape}")
print(f"Clases: {df['decision'].value_counts().to_dict()}")

Forma: (200000, 24)
Clases: {0: 153932, 1: 46068}


In [6]:
# Codificación one hot para variables
columnas_categoricas = ['country', 'city', 'property_type', 'furnishing_status']
df_codificado = pd.get_dummies(df, columns=columnas_categoricas, dtype=int)

print(f"Características después de codificar: {df_codificado.shape[1] - 1}")

Características después de codificar: 81


In [7]:
# Separar X, y
X = df_codificado.drop('decision', axis=1).values
y = df_codificado['decision'].values

# Normalizar
escalador = MinMaxScaler()
X_escalado = escalador.fit_transform(X)

print(f"Forma de X: {X_escalado.shape}")
print(f"Forma de y: {y.shape}")

Forma de X: (200000, 81)
Forma de y: (200000,)


In [9]:
# División 70/20/10
X_entreno, X_temp, y_entreno, y_temp = train_test_split(X_escalado, y, test_size=0.30, random_state=42, stratify=y)
X_prueba, X_validacion, y_prueba, y_validacion = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42, stratify=y_temp)

print(f"Entreno: {len(X_entreno)} ({len(X_entreno)/len(X)*100:.1f}%)")
print(f"Prueba:  {len(X_prueba)} ({len(X_prueba)/len(X)*100:.1f}%)")
print(f"Validación: {len(X_validacion)} ({len(X_validacion)/len(X)*100:.1f}%)")

num_caracteristicas = X_entreno.shape[1]
print(f"\nNúmero de características: {num_caracteristicas}")

Entreno: 140000 (70.0%)
Prueba:  40000 (20.0%)
Validación: 20000 (10.0%)

Número de características: 81


In [10]:
def crear_modelo(trial):
    
    # Hiperparámetros
    n_capas = trial.suggest_int("n_capas", 2, 4)
    unidades_1 = trial.suggest_int("unidades_1", 128, 512, step=128)
    unidades_2 = trial.suggest_int("unidades_2", 64, 256, step=64)
    dropout = trial.suggest_float("dropout", 0.2, 0.5)
    valor_l2 = trial.suggest_float("valor_l2", 1e-6, 1e-3, log=True)
    tasa_aprendizaje = trial.suggest_float("tasa_aprendizaje", 1e-5, 1e-3, log=True)
    
    # Modelo Dense
    modelo = keras.Sequential()
    
    # Primera capa
    modelo.add(layers.Dense(unidades_1, activation='relu', 
                           kernel_regularizer=regularizers.l2(valor_l2),
                           input_shape=(num_caracteristicas,)))
    modelo.add(layers.BatchNormalization())
    modelo.add(layers.Dropout(dropout))
    
    # Capas intermedias
    for i in range(n_capas - 1):
        modelo.add(layers.Dense(unidades_2, activation='relu',
                               kernel_regularizer=regularizers.l2(valor_l2)))
        modelo.add(layers.BatchNormalization())
        modelo.add(layers.Dropout(dropout))
    
    # Capa de salida
    modelo.add(layers.Dense(1, activation='sigmoid'))
    
    optimizador = keras.optimizers.Adam(learning_rate=tasa_aprendizaje)
    modelo.compile(optimizer=optimizador, loss='binary_crossentropy', metrics=['accuracy'])
    
    return modelo

In [11]:
def objetivo(trial):
    modelo = crear_modelo(trial)
    
    tamano_lote = 128
    
    callbacks = [
        EarlyStopping(monitor='loss', patience=5, restore_best_weights=True, verbose=0)]
    
    # Entrenar solo con entreno
    historial = modelo.fit(
        X_entreno, y_entreno,
        batch_size=tamano_lote,
        epochs=15,
        callbacks=callbacks,
        verbose=0)
    
    # Evaluar en prueba
    perdida, precision = modelo.evaluate(X_prueba, y_prueba, verbose=0)
    
    # Liberar memoria
    del modelo
    keras.backend.clear_session()
    
    return perdida

In [12]:
# Optimización con Optuna
mlflow.set_experiment("Tarea5_Optuna")

estudio = optuna.create_study(direction="minimize", study_name="prediccion_casa")
estudio.optimize(objetivo, n_trials=30, show_progress_bar=True)

print(f"\nMejor intento: {estudio.best_trial.number}")
print(f"Mejor pérdida: {estudio.best_value:.6f}")
print(f"\nMejores parámetros:")
for clave, valor in estudio.best_params.items():
    print(f"  {clave}: {valor}")

[I 2025-11-26 23:58:05,777] A new study created in memory with name: prediccion_casa
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1764223086.240848    3260 gpu_process_state.cc:208] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1764223086.491939    3260 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2857 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2025-11-26 23:58:15.009252: I external/local_xla/xla/service/service.cc:163] XLA service 0x706a7c007610 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-26 23:58:15.009364: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce GTX 1050 Ti, Compute Capability 6.1
2025-11-26 23:58:15.190623: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_C

[I 2025-11-27 00:01:25,394] Trial 0 finished with value: 0.022582845762372017 and parameters: {'n_capas': 4, 'unidades_1': 128, 'unidades_2': 256, 'dropout': 0.46567924867624183, 'valor_l2': 8.405452643476421e-05, 'tasa_aprendizaje': 0.0007820929266378541}. Best is trial 0 with value: 0.022582845762372017.


Best trial: 1. Best value: 0.00735917:   7%|▋         | 2/30 [05:59<1:22:16, 176.29s/it]

[I 2025-11-27 00:04:05,364] Trial 1 finished with value: 0.007359170354902744 and parameters: {'n_capas': 2, 'unidades_1': 256, 'unidades_2': 64, 'dropout': 0.37949437413517373, 'valor_l2': 5.991447875098912e-06, 'tasa_aprendizaje': 0.0004039905621101411}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  10%|█         | 3/30 [08:31<1:14:19, 165.15s/it]

[I 2025-11-27 00:06:37,263] Trial 2 finished with value: 0.20905248820781708 and parameters: {'n_capas': 3, 'unidades_1': 128, 'unidades_2': 64, 'dropout': 0.3755672643500845, 'valor_l2': 1.5773624030825565e-05, 'tasa_aprendizaje': 1.1682198879927657e-05}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  13%|█▎        | 4/30 [11:19<1:11:59, 166.13s/it]

[I 2025-11-27 00:09:24,889] Trial 3 finished with value: 0.014039883390069008 and parameters: {'n_capas': 2, 'unidades_1': 512, 'unidades_2': 256, 'dropout': 0.20982606726739844, 'valor_l2': 0.00032247394293704104, 'tasa_aprendizaje': 0.00038059830406529156}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  17%|█▋        | 5/30 [14:32<1:13:17, 175.90s/it]

[I 2025-11-27 00:12:38,109] Trial 4 finished with value: 0.1568838655948639 and parameters: {'n_capas': 4, 'unidades_1': 384, 'unidades_2': 256, 'dropout': 0.3526902785882766, 'valor_l2': 0.00019481503681585696, 'tasa_aprendizaje': 5.750708022268836e-05}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  20%|██        | 6/30 [49:57<5:35:27, 838.65s/it]

[I 2025-11-27 00:48:03,278] Trial 5 finished with value: 0.011875941418111324 and parameters: {'n_capas': 3, 'unidades_1': 256, 'unidades_2': 256, 'dropout': 0.2706117572917017, 'valor_l2': 8.597236061188818e-06, 'tasa_aprendizaje': 0.0008885280367897803}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  23%|██▎       | 7/30 [51:49<3:50:26, 601.16s/it]

[I 2025-11-27 00:49:55,485] Trial 6 finished with value: 0.45661890506744385 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 256, 'dropout': 0.38780308311109635, 'valor_l2': 0.0009153650750738264, 'tasa_aprendizaje': 1.2170729678684407e-05}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  27%|██▋       | 8/30 [53:35<2:42:37, 443.50s/it]

[I 2025-11-27 00:51:41,415] Trial 7 finished with value: 0.011771067976951599 and parameters: {'n_capas': 4, 'unidades_1': 256, 'unidades_2': 128, 'dropout': 0.37935455107495997, 'valor_l2': 6.73096064282985e-06, 'tasa_aprendizaje': 0.00020615935953555574}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  30%|███       | 9/30 [55:23<1:58:26, 338.43s/it]

[I 2025-11-27 00:53:28,804] Trial 8 finished with value: 0.022145655006170273 and parameters: {'n_capas': 2, 'unidades_1': 256, 'unidades_2': 128, 'dropout': 0.4532535716776607, 'valor_l2': 5.2784689594393735e-05, 'tasa_aprendizaje': 0.00014709674189449139}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  33%|███▎      | 10/30 [57:28<1:30:55, 272.77s/it]

[I 2025-11-27 00:55:34,563] Trial 9 finished with value: 0.01723330281674862 and parameters: {'n_capas': 2, 'unidades_1': 512, 'unidades_2': 256, 'dropout': 0.37283253902869434, 'valor_l2': 0.0004987589108437296, 'tasa_aprendizaje': 0.0004349248201463673}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  37%|███▋      | 11/30 [59:13<1:10:07, 221.44s/it]

[I 2025-11-27 00:57:19,622] Trial 10 finished with value: 0.11475389450788498 and parameters: {'n_capas': 3, 'unidades_1': 128, 'unidades_2': 64, 'dropout': 0.2821159707690904, 'valor_l2': 1.1489448282415182e-06, 'tasa_aprendizaje': 5.098299537161949e-05}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  40%|████      | 12/30 [1:01:14<57:13, 190.77s/it]  

[I 2025-11-27 00:59:20,237] Trial 11 finished with value: 0.010426797904074192 and parameters: {'n_capas': 4, 'unidades_1': 256, 'unidades_2': 128, 'dropout': 0.42496863772859816, 'valor_l2': 3.6794936587928374e-06, 'tasa_aprendizaje': 0.00019051924764168772}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 1. Best value: 0.00735917:  43%|████▎     | 13/30 [1:03:12<47:46, 168.61s/it]

[I 2025-11-27 01:01:17,870] Trial 12 finished with value: 0.009006055071949959 and parameters: {'n_capas': 3, 'unidades_1': 256, 'unidades_2': 128, 'dropout': 0.4992219772044605, 'valor_l2': 2.14396966333637e-06, 'tasa_aprendizaje': 0.000240943987585185}. Best is trial 1 with value: 0.007359170354902744.


Best trial: 13. Best value: 0.00654011:  47%|████▋     | 14/30 [1:05:19<41:41, 156.32s/it]

[I 2025-11-27 01:03:25,768] Trial 13 finished with value: 0.0065401094034314156 and parameters: {'n_capas': 3, 'unidades_1': 384, 'unidades_2': 64, 'dropout': 0.48193138506574673, 'valor_l2': 1.1721494032702744e-06, 'tasa_aprendizaje': 0.00037509832294593576}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  50%|█████     | 15/30 [1:07:11<35:43, 142.89s/it]

[I 2025-11-27 01:05:17,550] Trial 14 finished with value: 0.03878061845898628 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 64, 'dropout': 0.31529764515608416, 'valor_l2': 1.108927722157891e-06, 'tasa_aprendizaje': 7.82012764631472e-05}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  53%|█████▎    | 16/30 [1:09:20<32:20, 138.64s/it]

[I 2025-11-27 01:07:26,317] Trial 15 finished with value: 0.012830793857574463 and parameters: {'n_capas': 3, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.4314152903390188, 'valor_l2': 2.0102325451918875e-05, 'tasa_aprendizaje': 0.00045218797757603715}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  57%|█████▋    | 17/30 [1:11:23<28:59, 133.82s/it]

[I 2025-11-27 01:09:28,941] Trial 16 finished with value: 0.15037809312343597 and parameters: {'n_capas': 3, 'unidades_1': 512, 'unidades_2': 64, 'dropout': 0.4870361586092213, 'valor_l2': 3.6450673410956453e-06, 'tasa_aprendizaje': 3.0815009358514955e-05}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  60%|██████    | 18/30 [1:13:05<24:50, 124.23s/it]

[I 2025-11-27 01:11:10,823] Trial 17 finished with value: 0.0074141076765954494 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.41906824012508637, 'valor_l2': 2.5125413011983373e-06, 'tasa_aprendizaje': 0.0005868747251276321}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  63%|██████▎   | 19/30 [1:14:53<21:55, 119.63s/it]

[I 2025-11-27 01:12:59,733] Trial 18 finished with value: 0.018681224435567856 and parameters: {'n_capas': 2, 'unidades_1': 256, 'unidades_2': 64, 'dropout': 0.3260174980226912, 'valor_l2': 8.72468903259259e-06, 'tasa_aprendizaje': 0.00011696242385064577}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  67%|██████▋   | 20/30 [1:17:09<20:44, 124.48s/it]

[I 2025-11-27 01:15:15,527] Trial 19 finished with value: 0.008388625457882881 and parameters: {'n_capas': 3, 'unidades_1': 384, 'unidades_2': 64, 'dropout': 0.212648057103239, 'valor_l2': 1.0006727929411624e-06, 'tasa_aprendizaje': 0.00035489498163352107}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  70%|███████   | 21/30 [1:19:15<18:43, 124.81s/it]

[I 2025-11-27 01:17:21,121] Trial 20 finished with value: 0.008707699365913868 and parameters: {'n_capas': 3, 'unidades_1': 128, 'unidades_2': 192, 'dropout': 0.4086729097919626, 'valor_l2': 4.933484586660917e-06, 'tasa_aprendizaje': 0.00025723661157732686}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  73%|███████▎  | 22/30 [1:21:20<16:40, 125.02s/it]

[I 2025-11-27 01:19:26,626] Trial 21 finished with value: 0.007556880358606577 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.46408883221156966, 'valor_l2': 2.1954932272511653e-06, 'tasa_aprendizaje': 0.000607187254891657}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  77%|███████▋  | 23/30 [1:23:20<14:24, 123.44s/it]

[I 2025-11-27 01:21:26,372] Trial 22 finished with value: 0.008334715850651264 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.4229294416585363, 'valor_l2': 2.148110672522961e-06, 'tasa_aprendizaje': 0.0009965124564594112}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  80%|████████  | 24/30 [1:25:22<12:18, 123.13s/it]

[I 2025-11-27 01:23:28,783] Trial 23 finished with value: 0.011197075247764587 and parameters: {'n_capas': 2, 'unidades_1': 512, 'unidades_2': 128, 'dropout': 0.44695743742494326, 'valor_l2': 1.35569725028622e-05, 'tasa_aprendizaje': 0.0005294102573348646}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  83%|████████▎ | 25/30 [1:27:27<10:18, 123.62s/it]

[I 2025-11-27 01:25:33,553] Trial 24 finished with value: 0.0067955506965518 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.40192089186324204, 'valor_l2': 2.3642624468602677e-06, 'tasa_aprendizaje': 0.00028841830075890457}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  87%|████████▋ | 26/30 [1:29:37<08:21, 125.41s/it]

[I 2025-11-27 01:27:43,146] Trial 25 finished with value: 0.01335595827549696 and parameters: {'n_capas': 2, 'unidades_1': 256, 'unidades_2': 128, 'dropout': 0.3445394673146297, 'valor_l2': 2.7522320681373984e-05, 'tasa_aprendizaje': 0.0003169757234185196}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  90%|█████████ | 27/30 [1:32:11<06:42, 134.02s/it]

[I 2025-11-27 01:30:17,251] Trial 26 finished with value: 0.007048497907817364 and parameters: {'n_capas': 2, 'unidades_1': 384, 'unidades_2': 64, 'dropout': 0.39745661292449036, 'valor_l2': 1.5592650369446252e-06, 'tasa_aprendizaje': 0.00017539191468063842}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  93%|█████████▎| 28/30 [1:35:04<04:51, 145.59s/it]

[I 2025-11-27 01:33:09,823] Trial 27 finished with value: 0.010596007108688354 and parameters: {'n_capas': 3, 'unidades_1': 512, 'unidades_2': 128, 'dropout': 0.47825675151390334, 'valor_l2': 1.6325719587945135e-06, 'tasa_aprendizaje': 0.00011974965871693592}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011:  97%|█████████▋| 29/30 [1:37:24<02:24, 144.00s/it]

[I 2025-11-27 01:35:30,131] Trial 28 finished with value: 0.009974529035389423 and parameters: {'n_capas': 3, 'unidades_1': 384, 'unidades_2': 192, 'dropout': 0.39985547140091854, 'valor_l2': 3.409723705361805e-06, 'tasa_aprendizaje': 0.00014723438989321553}. Best is trial 13 with value: 0.0065401094034314156.


Best trial: 13. Best value: 0.00654011: 100%|██████████| 30/30 [1:39:33<00:00, 199.12s/it]

[I 2025-11-27 01:37:39,304] Trial 29 finished with value: 0.07912696897983551 and parameters: {'n_capas': 4, 'unidades_1': 384, 'unidades_2': 64, 'dropout': 0.44431475078038163, 'valor_l2': 1.3977784489449164e-06, 'tasa_aprendizaje': 8.375952042574526e-05}. Best is trial 13 with value: 0.0065401094034314156.

Mejor intento: 13
Mejor pérdida: 0.006540

Mejores parámetros:
  n_capas: 3
  unidades_1: 384
  unidades_2: 64
  dropout: 0.48193138506574673
  valor_l2: 1.1721494032702744e-06
  tasa_aprendizaje: 0.00037509832294593576



