In [1]:
# Cargar Datos CSV
import pandas as pd
data = pd.read_csv('DatosSingapore.csv')

In [3]:
import numpy as np

# detectar columnas binarias
binary_vars = [col for col in data.columns if 
               set(data[col].dropna().unique()).issubset({0,1})]

# variables numéricas verdaderas (continuas)
numerical_true = [col for col in numerical_vars if col not in binary_vars]

print("Columnas binarias:", binary_vars)
print("Numéricas reales:", numerical_true)


Columnas binarias: ['host_is_superhost', 'host_has_profile_pic', 'host_identity_verified', 'instant_bookable', 'neighbourhood_cleansed:Ang Mo Kio', 'neighbourhood_cleansed:Bedok', 'neighbourhood_cleansed:Bishan', 'neighbourhood_cleansed:Bukit Batok', 'neighbourhood_cleansed:Bukit Merah', 'neighbourhood_cleansed:Bukit Panjang', 'neighbourhood_cleansed:Bukit Timah', 'neighbourhood_cleansed:Central Water Catchment', 'neighbourhood_cleansed:Changi', 'neighbourhood_cleansed:Choa Chu Kang', 'neighbourhood_cleansed:Clementi', 'neighbourhood_cleansed:Downtown Core', 'neighbourhood_cleansed:Geylang', 'neighbourhood_cleansed:Hougang', 'neighbourhood_cleansed:Jurong East', 'neighbourhood_cleansed:Jurong West', 'neighbourhood_cleansed:Kallang', 'neighbourhood_cleansed:Marina South', 'neighbourhood_cleansed:Marine Parade', 'neighbourhood_cleansed:Museum', 'neighbourhood_cleansed:Newton', 'neighbourhood_cleansed:Novena', 'neighbourhood_cleansed:Orchard', 'neighbourhood_cleansed:Outram', 'neighbourho

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 1. Detectar columnas binarias
binary_vars = [col for col in data.columns 
               if set(data[col].dropna().unique()).issubset({0, 1})]

# 2. Detectar columnas numéricas verdaderas
numerical_vars = data.select_dtypes(include=['number']).columns
numerical_true = [col for col in numerical_vars if col not in binary_vars]

# 3. Vector de características
X = data[binary_vars + numerical_true]

# 4. Variable objetivo
y = data['price']  # Ajusta si tu nombre es otro

# 5. Escalar solo numéricas continuas
scaler = StandardScaler()
X_scaled = X.copy()
X_scaled[numerical_true] = scaler.fit_transform(X[numerical_true])

# 6. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [5]:
import tensorflow as tf
from tensorflow import keras

def create_model(n_neurons=64, n_layers=2, lr=0.001, dropout_rate=0.0):
    model = keras.Sequential()
    
    # Capa de entrada
    model.add(keras.layers.Input(shape=(X_train.shape[1],)))

    # Capas ocultas
    for _ in range(n_layers):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
        if dropout_rate > 0:
            model.add(keras.layers.Dropout(dropout_rate))

    # Capa de salida para regresión
    model.add(keras.layers.Dense(1, activation="linear"))

    # Compilar
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="mean_squared_error",
        metrics=["mean_absolute_error"]
    )
    
    return model


In [None]:
import mlflow
import mlflow.tensorflow
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math

mlflow.set_experiment("Airbnb_price_regression")

def train_and_log(params):
    n_neurons = params["n_neurons"]
    n_layers = params["n_layers"]
    lr = params["lr"]
    dropout = params["dropout"]

    with mlflow.start_run():

        # Registrar parámetros
        mlflow.log_params(params)

        # Crear modelo
        model = create_model(
            n_neurons=n_neurons,
            n_layers=n_layers,
            lr=lr,
            dropout_rate=dropout
        )

        # Entrenamiento
        history = model.fit(
            X_train, y_train,
            validation_split=0.2,
            epochs=50,
            batch_size=32,
            verbose=0
        )

        # Predicción
        preds = model.predict(X_test).flatten()

        # Métricas
        rmse = math.sqrt(mean_squared_error(y_test, preds))
        mae = mean_absolute_error(y_test, preds)

        # Registrar métricas
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)

        # Guardar modelo
        mlflow.tensorflow.log_model(model, "model")

        print(f"✔ Run logged — RMSE: {rmse:.2f}, MAE: {mae:.2f}")


In [None]:
search_space = [
    {"n_neurons": 32, "n_layers": 2, "lr": 0.001, "dropout": 0.0},
    {"n_neurons": 64, "n_layers": 2, "lr": 0.001, "dropout": 0.2},
    {"n_neurons": 128, "n_layers": 3, "lr": 0.0005, "dropout": 0.3},
    {"n_neurons": 256, "n_layers": 3, "lr": 0.0001, "dropout": 0.4},
]


In [None]:
for params in search_space:
    train_and_log(params)
