## Cargar Datos

In [None]:
# Cargar Datos CSV
import pandas as pd
data = pd.read_csv('DatosSingapore2.csv')

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# 1) VARIABLES QUE DESEAS USAR
num_vars = [
    "accommodates",
    "number_of_private_bathrooms",
    "number_of_shared_bathrooms",
    "bedrooms",
    "beds",
    "minimum_nights",
    "review_scores_rating",
    "review_scores_communication",
    "review_scores_location",
    "estimated_response_time_hours",
    "host_acceptance_rate",
    "host_response_rate"
]

cat_prefixes = ["property_type", "room_type"]

# 2) MANEJO DE ONE-HOT CATEGÓRICAS
existing_cat_cols = [c for c in data.columns if any(c.startswith(f"{p}:") for p in cat_prefixes)]

if existing_cat_cols:
    cat_df = data[existing_cat_cols].copy()
else:
    to_dummy = [p for p in cat_prefixes if p in data.columns]
    cat_df = pd.get_dummies(data[to_dummy].astype(str), prefix=to_dummy) if to_dummy else pd.DataFrame(index=data.index)

# 3) NUMÉRICAS
num_df = data[num_vars].copy()

# 4) UNIR FEATURES
X = pd.concat([num_df, cat_df], axis=1)

# 5) DETECTAR BINARIAS Y ESCALAR SOLO CONTINUAS
binary_vars = [col for col in X.columns if set(X[col].dropna().unique()).issubset({0, 1})]

numerical_true = [col for col in num_vars if col not in binary_vars]

scaler = StandardScaler()
X_scaled = X.copy()
if numerical_true:
    X_scaled[numerical_true] = scaler.fit_transform(X[numerical_true])

X_scaled = X_scaled.astype("float32")

# 6) TARGET Y SPLIT
y = data["price"].astype("float32")

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

input_dim = X_train.shape[1]
print(f"Total variables usadas: {input_dim}")

## Modelo Final

In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import math

# 1. Crear el modelo con L2
model = keras.Sequential()

# Capa de entrada
model.add(keras.layers.Input(shape=(X_train.shape[1],)))

# Capas ocultas (4 capas, 128 neuronas, activación relu, L2 regularization)
for _ in range(4):
    model.add(keras.layers.Dense(
        128,
        activation="relu",
        kernel_regularizer=keras.regularizers.l2(0.0005)  # Regularización L2
    ))

# Capa de salida (activación relu)
model.add(keras.layers.Dense(1, activation="relu"))

# Compilación
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mean_squared_error",
    metrics=["mean_absolute_error"]
)

# 2. Early Stopping
early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=20,
    restore_best_weights=True
)

# 3. Entrenamiento del modelo
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=200,
    batch_size=32,
    verbose=2,
    callbacks=[early_stop]
)

## Guardar el modelo Final

In [None]:
# Guardar el modelo entrenado
model.save("modelo_regresion_airbnb.h5")
print("✔ Modelo guardado como modelo_regresion_airbnb.h5")

# Guardar el escalador
import joblib

# Suponiendo que tu escalador se llama 'scaler'
joblib.dump(scaler, "scaler_regresion.pkl")
print("✔ Escalador guardado como scaler_regresion.pkl")