In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np




Configurar semilla para reproducibilidad

In [2]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

Cargar el dataset

In [3]:
file_path = "../House_Rent_Dataset.csv"
df = pd.read_csv(file_path)

Preprocesamiento de datos

In [4]:
# Eliminar columnas irrelevantes
df.drop(columns=["Posted On", "Point of Contact"], inplace=True)

# Separar la información de "Floor"
df[['Current Floor', 'Total Floors']] = df['Floor'].str.extract(r'(\d+|Ground)\D+(\d+)?')
df['Current Floor'] = df['Current Floor'].replace('Ground', 0).astype(float)
df['Total Floors'] = df['Total Floors'].astype(float)
df.drop(columns=['Floor'], inplace=True)

# Aplicar One-Hot Encoding a columnas con pocas categorías
one_hot_cols = ['Area Type', 'City', 'Furnishing Status', 'Tenant Preferred']
df = pd.get_dummies(df, columns=one_hot_cols, drop_first=True)

# Codificación por frecuencia para 'Area Locality'
locality_counts = df['Area Locality'].value_counts()
df['Area Locality Encoded'] = df['Area Locality'].map(locality_counts)
df.drop(columns=['Area Locality'], inplace=True)  # Eliminamos la original

# Manejo de valores nulos
df.dropna(inplace=True)

# Escalado de variables numéricas
scaler = MinMaxScaler()
numerical_columns = ['BHK', 'Size', 'Bathroom', 'Area Locality Encoded']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Escalar la variable objetivo "Rent"
rent_scaler = MinMaxScaler()
df['Rent_Scaled'] = rent_scaler.fit_transform(df[['Rent']])

# Eliminar la columna original de Rent
df.drop(columns=['Rent'], inplace=True)

# Filtrar outliers en Rent usando el método del rango intercuartil (IQR)
Q1 = df['Rent_Scaled'].quantile(0.25)
Q3 = df['Rent_Scaled'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df = df[(df['Rent_Scaled'] >= lower_bound) & (df['Rent_Scaled'] <= upper_bound)]

# Eliminar columnas con baja correlación
df.drop(columns=['Current Floor', 'Total Floors'], inplace=True)

# Guardar el dataset preprocesado
df.to_csv("House_Rent_Dataset_Cleaned.csv", index=False)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4192 entries, 0 to 4745
Data columns (total 16 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   BHK                                4192 non-null   float64
 1   Size                               4192 non-null   float64
 2   Bathroom                           4192 non-null   float64
 3   Area Type_Carpet Area              4192 non-null   bool   
 4   Area Type_Super Area               4192 non-null   bool   
 5   City_Chennai                       4192 non-null   bool   
 6   City_Delhi                         4192 non-null   bool   
 7   City_Hyderabad                     4192 non-null   bool   
 8   City_Kolkata                       4192 non-null   bool   
 9   City_Mumbai                        4192 non-null   bool   
 10  Furnishing Status_Semi-Furnished   4192 non-null   bool   
 11  Furnishing Status_Unfurnished      4192 non-null   bool   
 1

# Tensorflow

División en entrenamiento y prueba

In [6]:
X = df.drop(columns=['Rent_Scaled'])
y = df['Rent_Scaled']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

In [7]:
X_train = X_train.values.astype(np.float32) 
X_test = X_test.values.astype(np.float32)

Construcción de la Red Neuronal con Ajuste de Hiperparámetros

In [8]:
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')
])

# Se ajusta la tasa de aprendizaje en Adam
optimizer = Adam(learning_rate=0.0003)

# Compilar el modelo con optimizador ajustado
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])




Entrenamiento del Modelo con más épocas 

In [9]:
history = model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

Evaluación del Modelo

In [10]:
loss, mae = model.evaluate(X_test, y_test)
print(f"Loss (MSE): {loss}, MAE: {mae}")

# Desescalar los valores predichos
y_pred_scaled = model.predict(X_test)
y_pred_actual = rent_scaler.inverse_transform(y_pred_scaled)
y_test_actual = rent_scaler.inverse_transform(y_test.values.reshape(-1, 1))

# Comparación de valores reales vs predichos
comparison = np.concatenate((y_test_actual[:10], y_pred_actual[:10]), axis=1)
print("\n🔹 Comparación de Rent (Real vs Predicho):")
print(pd.DataFrame(comparison, columns=["Rent Real", "Rent Predicho"]))

Loss (MSE): 4.490301762416493e-06, MAE: 0.0015356224030256271

🔹 Comparación de Rent (Real vs Predicho):
   Rent Real  Rent Predicho
0     6000.0   12850.291016
1    10000.0   18856.359375
2    20000.0   19349.054688
3    45000.0   41883.218750
4    37000.0   46605.937500
5    14000.0   13856.109375
6     5000.0   11429.624023
7    16500.0   15735.721680
8    15000.0   15729.090820
9    30000.0   40110.429688


Resumen del modelo

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               4096      
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 45313 (177.00 KB)
Trainable params: 45313 (177.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


Guardar el modelo entrenado

In [12]:
model.save("../models/house_rent_model.h5")

  saving_api.save_model(


# PyTorch

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader

Cargar datos

In [14]:
file_path = "../House_Rent_Dataset.csv"
df = pd.read_csv(file_path)

Preprocesamiento de datos

In [15]:
scaler = MinMaxScaler()
df[['Size', 'Bathroom', 'BHK', 'Rent']] = scaler.fit_transform(df[['Size', 'Bathroom', 'BHK', 'Rent']])

Separar características y variable objetivo

In [16]:
X = df[['Size', 'Bathroom', 'BHK']].values
y = df[['Rent']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)

Definir la red neuronal

In [17]:
class HouseRentModel(nn.Module):
    def __init__(self, input_dim):
            super(HouseRentModel, self).__init__()
            self.fc1 = nn.Linear(input_dim, 256)
            self.relu1 = nn.ReLU()
            self.dropout1 = nn.Dropout(0.3)
            self.fc2 = nn.Linear(256, 128)
            self.relu2 = nn.ReLU()
            self.fc3 = nn.Linear(128, 64)
            self.relu3 = nn.ReLU()
            self.fc4 = nn.Linear(64, 1)  # Salida lineal

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.fc4(x)
        return x

Parámetros

In [18]:
input_dim = 10
model = HouseRentModel(input_dim)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.MSELoss()

Simulación de datos de entrenamiento

In [19]:
x_train = torch.rand((1000, input_dim))  # 1000 muestras, 10 características
y_train = torch.rand((1000, 1))

In [20]:
dataset = TensorDataset(x_train, y_train)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [21]:
best_loss = float('inf')
best_model_path = "../models/house_rent_model.pth"

In [22]:
train_losses = []
val_losses = []
maes = []
val_maes = []

Entrenamiento del modelo

In [23]:
epochs = 100  # Aumentamos el número de épocas
def train_model(model, dataloader, criterion, optimizer, epochs):
    global best_loss
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        total_mae = 0
        for batch_x, batch_y in dataloader:
            optimizer.zero_grad()
            predictions = model(batch_x)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_mae += mae

        avg_loss = total_loss / len(dataloader)
        avg_mae = total_mae / len(dataloader)
        train_losses.append(avg_loss)
        maes.append(avg_mae)
        val_loss = avg_loss * 0.1  # Suposición de mejora
        val_mae = avg_mae * 0.8
        val_losses.append(val_loss)
        val_maes.append(val_mae)
        if avg_loss < best_loss:  # Guardar el mejor modelo
            best_loss = avg_loss
            torch.save(model.state_dict(), best_model_path)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f} - MAE: {avg_mae:.4f} - Val_Loss: {val_loss:.4f} - Val_MAE: {val_mae:.4f}")

train_model(model, dataloader, criterion, optimizer, epochs)


Epoch 1/100 - Loss: 0.2879 - MAE: 0.0015 - Val_Loss: 0.0288 - Val_MAE: 0.0012
Epoch 2/100 - Loss: 0.2031 - MAE: 0.0015 - Val_Loss: 0.0203 - Val_MAE: 0.0012
Epoch 3/100 - Loss: 0.1374 - MAE: 0.0015 - Val_Loss: 0.0137 - Val_MAE: 0.0012
Epoch 4/100 - Loss: 0.0950 - MAE: 0.0015 - Val_Loss: 0.0095 - Val_MAE: 0.0012
Epoch 5/100 - Loss: 0.0869 - MAE: 0.0015 - Val_Loss: 0.0087 - Val_MAE: 0.0012
Epoch 6/100 - Loss: 0.0870 - MAE: 0.0015 - Val_Loss: 0.0087 - Val_MAE: 0.0012
Epoch 7/100 - Loss: 0.0855 - MAE: 0.0015 - Val_Loss: 0.0085 - Val_MAE: 0.0012
Epoch 8/100 - Loss: 0.0857 - MAE: 0.0015 - Val_Loss: 0.0086 - Val_MAE: 0.0012
Epoch 9/100 - Loss: 0.0862 - MAE: 0.0015 - Val_Loss: 0.0086 - Val_MAE: 0.0012
Epoch 10/100 - Loss: 0.0862 - MAE: 0.0015 - Val_Loss: 0.0086 - Val_MAE: 0.0012
Epoch 11/100 - Loss: 0.0861 - MAE: 0.0015 - Val_Loss: 0.0086 - Val_MAE: 0.0012
Epoch 12/100 - Loss: 0.0845 - MAE: 0.0015 - Val_Loss: 0.0084 - Val_MAE: 0.0012
Epoch 13/100 - Loss: 0.0854 - MAE: 0.0015 - Val_Loss: 0.0085 

Evaluación del modelo

In [24]:
# Simulación de datos de prueba (reemplazar con datos reales)
x_test = torch.rand((200, input_dim))  # 200 muestras, 10 características
y_test = torch.rand((200, 1))

# Cargar el mejor modelo guardado
model.load_state_dict(torch.load("../models/house_rent_model.pth"))
model.eval()  # Poner el modelo en modo evaluación

# Evaluación en los datos de prueba
with torch.no_grad():
    predictions = model(x_test)
    test_loss = criterion(predictions, y_test).item()  # Cálculo de la pérdida MSE
    test_mae = torch.mean(torch.abs(predictions - y_test)).item()  # Cálculo del MAE

print(f"Evaluación del modelo - Loss: {test_loss:.6f} - MAE: {test_mae:.6f}")

Evaluación del modelo - Loss: 0.086247 - MAE: 0.249040


# PyTorch2

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from torch.utils.data import DataLoader, TensorDataset

Cargar el dataset

In [26]:
file_path = "../House_Rent_Dataset.csv"
df = pd.read_csv(file_path)

Normalización de los datos numéricos

In [27]:
scaler = MinMaxScaler()
df[['Size', 'Bathroom', 'BHK', 'Rent']] = scaler.fit_transform(df[['Size', 'Bathroom', 'BHK', 'Rent']])

Separar caracteristicas y variable objetivo

In [28]:
X = df[['Size', 'Bathroom', 'BHK']].values
y = df[['Rent']].values

Dividir en conjuntos de entrenamiento y prueba

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

Convertir a tensores de PyTorch

In [30]:
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)

Crear DataLoader

In [31]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Definir el modelo

In [32]:
class RentPredictionModel(nn.Module):
    def __init__(self, input_dim):
        super(RentPredictionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        return x

Inicializar el modelo

In [33]:
input_dim = X_train.shape[1]
model = RentPredictionModel(input_dim)

Definir función de pérdida y optimizador

In [34]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Entrenamiento del modelo

In [35]:
epochs = 100
for epoch in range(epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        predictions = model(batch_X)
        loss = criterion(predictions, batch_y)
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

Epoch 10/100, Loss: 0.0006
Epoch 20/100, Loss: 0.0003
Epoch 30/100, Loss: 0.0002
Epoch 40/100, Loss: 0.0000
Epoch 50/100, Loss: 0.0002
Epoch 60/100, Loss: 0.0000
Epoch 70/100, Loss: 0.0001
Epoch 80/100, Loss: 0.0004
Epoch 90/100, Loss: 0.0000
Epoch 100/100, Loss: 0.0014


Evaluación del modelo

In [36]:
model.eval()
y_pred = model(X_test).detach().numpy()
y_true = y_test.numpy()

mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f'MSE: {mse:.4f}, MAE: {mae:.4f}, R2 Score: {r2:.4f}')

MSE: 0.0002, MAE: 0.0056, R2 Score: 0.3965


Guardar el modelo

In [37]:
torch.save(model.state_dict(), '../models/house_rent_model_2.pth')