# Tarea 5

In [1]:
# Configuración para GPU

import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix

import optuna
import mlflow
import dagshub

np.random.seed(42)
tf.random.set_seed(42)

2025-11-26 23:48:48.534469: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Verificar y configurar GPU

gpus = tf.config.list_physical_devices('GPU')
print(f"GPUs disponibles: {len(gpus)}")

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU configurada: {gpus[0].name}")
    except RuntimeError as e:
        print(f"Error al configurar GPU: {e}")

GPUs disponibles: 1
GPU configurada: /physical_device:GPU:0


In [4]:
# Dagshub
dagshub.init(repo_owner='404brainnotfound-ai', repo_name='Tarea_5', mlflow=True)
print(f"URI de seguimiento: {mlflow.get_tracking_uri()}")

URI de seguimiento: https://dagshub.com/404brainnotfound-ai/Tarea_5.mlflow


In [5]:
# Dataset
df = pd.read_csv('global_house_purchase_dataset.csv')
df = df.drop('property_id', axis=1)

print(f"Forma: {df.shape}")
print(f"Clases: {df['decision'].value_counts().to_dict()}")

Forma: (200000, 24)
Clases: {0: 153932, 1: 46068}


In [6]:
# Codificación one hot para variables
columnas_categoricas = ['country', 'city', 'property_type', 'furnishing_status']
df_codificado = pd.get_dummies(df, columns=columnas_categoricas, dtype=int)

print(f"Características después de codificar: {df_codificado.shape[1] - 1}")

Características después de codificar: 81


In [7]:
# Separar X, y
X = df_codificado.drop('decision', axis=1).values
y = df_codificado['decision'].values

# Normalizar
escalador = MinMaxScaler()
X_escalado = escalador.fit_transform(X)

print(f"Forma de X: {X_escalado.shape}")
print(f"Forma de y: {y.shape}")

Forma de X: (200000, 81)
Forma de y: (200000,)


In [9]:
# División 70/20/10
X_entreno, X_temp, y_entreno, y_temp = train_test_split(X_escalado, y, test_size=0.30, random_state=42, stratify=y)
X_prueba, X_validacion, y_prueba, y_validacion = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42, stratify=y_temp)

print(f"Entreno: {len(X_entreno)} ({len(X_entreno)/len(X)*100:.1f}%)")
print(f"Prueba:  {len(X_prueba)} ({len(X_prueba)/len(X)*100:.1f}%)")
print(f"Validación: {len(X_validacion)} ({len(X_validacion)/len(X)*100:.1f}%)")

num_caracteristicas = X_entreno.shape[1]
print(f"\nNúmero de características: {num_caracteristicas}")

Entreno: 140000 (70.0%)
Prueba:  40000 (20.0%)
Validación: 20000 (10.0%)

Número de características: 81


In [10]:
def crear_modelo(trial):
    
    # Hiperparámetros
    n_capas = trial.suggest_int("n_capas", 2, 4)
    unidades_1 = trial.suggest_int("unidades_1", 128, 512, step=128)
    unidades_2 = trial.suggest_int("unidades_2", 64, 256, step=64)
    dropout = trial.suggest_float("dropout", 0.2, 0.5)
    valor_l2 = trial.suggest_float("valor_l2", 1e-6, 1e-3, log=True)
    tasa_aprendizaje = trial.suggest_float("tasa_aprendizaje", 1e-5, 1e-3, log=True)
    
    # Modelo Dense
    modelo = keras.Sequential()
    
    # Primera capa
    modelo.add(layers.Dense(unidades_1, activation='relu', 
                           kernel_regularizer=regularizers.l2(valor_l2),
                           input_shape=(num_caracteristicas,)))
    modelo.add(layers.BatchNormalization())
    modelo.add(layers.Dropout(dropout))
    
    # Capas intermedias
    for i in range(n_capas - 1):
        modelo.add(layers.Dense(unidades_2, activation='relu',
                               kernel_regularizer=regularizers.l2(valor_l2)))
        modelo.add(layers.BatchNormalization())
        modelo.add(layers.Dropout(dropout))
    
    # Capa de salida
    modelo.add(layers.Dense(1, activation='sigmoid'))
    
    optimizador = keras.optimizers.Adam(learning_rate=tasa_aprendizaje)
    modelo.compile(optimizer=optimizador, loss='binary_crossentropy', metrics=['accuracy'])
    
    return modelo