# Red Neuronal para Predicción de Precios de Arriendo

En este notebook, entrenaremos un modelo de red neuronal para predecir los precios de arriendo de propiedades residenciales en los municipios del Valle de Aburrá, Antioquia. Utilizaremos los datos preprocesados que se encuentran en la carpeta `data/processed/`.

In [None]:
# Importar librerías necesarias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
import keras_tuner as kt
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Adadelta, Nadam



In [None]:
#Busqueda de hiperparámetros para ajustar el mejor modelo
#se varia el número de capas ocultas, el número de neuronas por capa, la función de activación y el optimizador

In [None]:
### Hiperparámetros optimizados con rnn


# Cargar datos
def load_data(filepath):
    df = pd.read_excel(filepath)
    return df

# Procesamiento de datos
def preprocess_data(df):
    target = 'precio'
    categorical_features = ['ciudad', 'antiguedad', 'comuna', 'zona', 'tipo_de_inmueble', 'estado']
    numerical_features = df.drop(columns=categorical_features + [target]).columns.tolist()
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ]
    )
    
    X = df.drop(columns=[target])
    y = df[target].values.reshape(-1, 1)
    target_scaler = StandardScaler()
    y = target_scaler.fit_transform(y)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return preprocessor, target_scaler, X_train, X_test, y_train, y_test

# Modelo de red neuronal optimizable
def build_model(hp, input_shape):
    model = Sequential()
    model.add(Input(shape=(input_shape,)))
    
    for i in range(hp.Int('num_layers', 1, 4)):
        model.add(Dense(units=hp.Int('units_' + str(i),
                                     min_value=32,
                                     max_value=512,
                                     step=32),
                        activation=hp.Choice('activation_' + str(i), ['relu', 'tanh', 'sigmoid','leaky_relu'])))
        model.add(BatchNormalization())
        model.add(Dropout(rate=hp.Float('dropout_' + str(i), min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(Dense(1))
    # Selección de optimizador y tasa de aprendizaje
    optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'sgd', 'adadelta'])
    learning_rate = hp.Float('learning_rate', min_value=0.0001, max_value=0.02, sampling='LOG')

    if optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'adadelta':
        opt = Adadelta(learning_rate=learning_rate)
    else:
        opt = Adam(learning_rate=learning_rate)

    model.compile(optimizer=opt, loss='mse', metrics=['mae'])
    return model

# Optimización de hiperparámetros
def hyperparameter_tuning(X_train_transformed, y_train, input_shape):
    tuner = kt.Hyperband(
        lambda hp: build_model(hp, input_shape),
        objective='val_mae',
        max_epochs=20,
        factor=2,
        directory='hyperband',
        project_name='nn_tuning'
    )

    stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    tuner.search(X_train_transformed, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early], verbose=1)
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    return best_hps

# Entrenamiento de la red neuronal
def train_neural_network(X_train, X_test, y_train, y_test, preprocessor, target_scaler, best_hps):
    X_train_transformed = preprocessor.fit_transform(X_train)
    X_test_transformed = preprocessor.transform(X_test)
    
    model = build_model(best_hps, X_train_transformed.shape[1])
    model.fit(X_train_transformed, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=0)
    
    # Obtener las predicciones y reescalar la variable objetivo a su escala original
    predictions = model.predict(X_test_transformed).flatten()
    predictions = target_scaler.inverse_transform(predictions.reshape(-1, 1)).flatten()
    
    # Evaluar el modelo
    evaluate_model(predictions, target_scaler.inverse_transform(y_test).flatten())
    
    return model, predictions

# Evaluación del modelo
def evaluate_model(predictions, y_test):
    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    
    print(f"MAE: {mae}")
    print(f"RMSE: {rmse}")
    print(f"R2 Score: {r2}")
    
    plt.figure(figsize=(7, 4))
    sns.scatterplot(x=y_test, y=predictions, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.xlabel('Actual Prices')
    plt.ylabel('Predicted Prices')
    plt.title('Actual vs Predicted Prices')
    plt.show()

# Cargar y procesar datos
filepath = '../data/processed/data_arriendos_model.xlsx'
df = load_data(filepath)
preprocessor, target_scaler, X_train, X_test, y_train, y_test = preprocess_data(df)

# Transformar datos de entrenamiento para la optimización
X_train_transformed = preprocessor.fit_transform(X_train)

# Optimizar hiperparámetros
best_hps = hyperparameter_tuning(X_train_transformed, y_train, X_train_transformed.shape[1])
print("Mejores hiperparámetros encontrados:")
print(best_hps.values)

# Entrenar modelo con los mejores hiperparámetros
nn_model, nn_predictions = train_neural_network(X_train, X_test, y_train, y_test, preprocessor, target_scaler, best_hps)

In [None]:
## Mejor modelo encontrado

###--------------------------------------------------###
# Cargar datos
def load_data(filepath):
    df = pd.read_excel(filepath)
    df['precio_log'] = np.log1p(df['precio'])
    df = df.drop(columns=['precio'])
    return df

# Procesamiento de datos
def preprocess_data(df):
    target = 'precio_log'
    categorical_features = ['ciudad', 'antiguedad', 'comuna', 'zona', 'tipo_de_inmueble', 'estado']
    numerical_features = df.drop(columns=categorical_features + [target]).columns.tolist()
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ]
    )
    
    X = df.drop(columns=[target])
    y = df[target].values.reshape(-1, 1)
    target_scaler = StandardScaler()
    y = target_scaler.fit_transform(y)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return preprocessor, target_scaler, X_train, X_test, y_train, y_test

# Entrenamiento de la red neuronal
def train_neural_network(X_train, X_test, y_train, y_test, preprocessor, target_scaler):
    X_train_transformed = preprocessor.fit_transform(X_train)
    X_test_transformed = preprocessor.transform(X_test)
    

    # Definición del modelo
    # Definición del modelo con los mejores hiperparámetros encontrados
    model = Sequential([
        Input(shape=(X_train_transformed.shape[1],)),

        # Capa 1
        Dense(384, activation='tanh'),
        BatchNormalization(),
        Dropout(0.1),

        # Capa 2
        Dense(192, activation='leaky_relu'),
        BatchNormalization(),
        Dropout(0.3),

        # Capa 3 (no es necesaria según los mejores hiperparámetros, pero si deseas usarla, podría ser ajustada)
        Dense(320, activation='sigmoid'),
        BatchNormalization(),
        Dropout(0.3),

        # Capa de salida
        Dense(1)
    ])

    # Compilación del modelo con el optimizador rmsprop y la tasa de aprendizaje encontrada
    optimizer = RMSprop(learning_rate=0.00012122536697831619)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    history = model.fit(X_train_transformed, y_train, epochs=200, batch_size=64, validation_split=0.2, verbose=0)

    
    # Obtener las predicciones y reescalar la variable objetivo a su escala original
    predictions = model.predict(X_test_transformed).flatten()
    predictions = target_scaler.inverse_transform(predictions.reshape(-1, 1)).flatten()
    
    y_test = target_scaler.inverse_transform(y_test).flatten()
    y_test = np.expm1(y_test)
    predictions = np.expm1(predictions)

    # Evaluar el modelo
    evaluate_model(predictions, y_test)
    
    return model, predictions, history

# Evaluación del modelo
def evaluate_model(predictions, y_test):
    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    r2 = r2_score(y_test, predictions)
    
    print(f"MAE: {mae}")
    print(f"RMSE: {rmse}")
    print(f"R2 Score: {r2}")
    
    plt.figure(figsize=(7, 4))
    sns.scatterplot(x=y_test, y=predictions, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.ticklabel_format(style='plain')
    plt.xlabel('Valores reales')
    plt.ylabel('Predicciones')
    plt.title('Valores reales vs Predicciones')
    plt.show()

# Cargar y procesar datos
filepath = '../data/processed/data_arriendos_model.xlsx'
df = load_data(filepath)
preprocessor, target_scaler, X_train, X_test, y_train, y_test = preprocess_data(df)

# Entrenar modelo
nn_model_x, nn_predictions_x, history = train_neural_network(X_train, X_test, y_train, y_test, preprocessor, target_scaler)

In [None]:
## Funcion de perdida 
import matplotlib.pyplot as plt

def plot_loss_curve(history):
    """
    Genera un gráfico de la curva de pérdida (loss curve) durante el entrenamiento.

    Parámetros:
    - history: Objeto History de Keras que contiene los valores de pérdida y métrica por época.
    """
    plt.figure(figsize=(8, 6))
    plt.plot(history.history['loss'], label='Pérdida de entrenamiento')
    plt.plot(history.history['val_loss'], label='Pérdida de validación')
    plt.xlabel('Épocas')
    plt.ylabel('Pérdida (MSE)')
    plt.title('Curva de Pérdida (Loss Curve)')
    plt.legend()
    plt.grid(True)
    plt.show()
plot_loss_curve(history)

In [None]:
## Resumen del modelo
nn_model_x.summary()

In [None]:
# Configuracion detallada del modelo 
nn_model_x.get_config()

In [None]:
# Pesos del modelo y variables no entrenables
nn_model_x.get_weights()
nn_model_x.non_trainable_weights

In [None]:
#Grafico de la red neuronal obtenida 

# Configuración de la red
layer_sizes = [98, 384, 192, 320, 1]  # Número de neuronas por capa
layer_names = ["Input", "Dense 1", "Dense 2", "Dense 3", "Output"]

# Espaciado mejorado
x_spacing = 4  # Mayor espacio entre capas
y_spacing = 0.3  # Más espacio entre neuronas dentro de una capa

# Posiciones de las capas
x_positions = np.arange(len(layer_sizes)) * x_spacing

fig, ax = plt.subplots(figsize=(18, 12))

# Dibujar las neuronas con mayor separación
for i, (size, x) in enumerate(zip(layer_sizes, x_positions)):
    y_positions = np.linspace(-size * y_spacing / 2, size * y_spacing / 2, size)
    ax.scatter([x] * size, y_positions, s=50, c='royalblue', label=layer_names[i] if i == 0 else "")

    # Dibujar conexiones con más separación
    if i > 0:
        prev_size = layer_sizes[i - 1]
        prev_y_positions = np.linspace(-prev_size * y_spacing / 2, prev_size * y_spacing / 2, prev_size)
        for y1 in prev_y_positions:
            for y2 in y_positions:
                ax.plot([x_positions[i - 1], x], [y1, y2], c="lightgray", lw=0.3, alpha=0.7)

# Etiquetas de capas
ax.set_xticks(x_positions)
ax.set_xticklabels(layer_names, fontsize=12)
ax.set_yticks([])  # Ocultar el eje Y
ax.set_title("Estructura de la Red Neuronal (Matplotlib Mejorado)", fontsize=14)

plt.show()