In [68]:
X_TRAIN_PATH = "../dataset/X_train.pkl"
X_TEST_PATH = "../dataset/X_test.pkl"
Y_TRAIN_PATH = "../dataset/y_train.pkl"
Y_TEST_PATH = "../dataset/y_test.pkl"
MLFLOW_TRACKING_URI = '../models/mlruns/'
MLFLOW_EXPERIMENT_NAME = "real-estate-price-prediction"

In [69]:
import os
import pandas as pd
import joblib
import numpy as np
import torch 
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import pandas as pd
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature


In [60]:
X_train = joblib.load(X_TRAIN_PATH)
X_test = joblib.load(X_TEST_PATH)
y_train = joblib.load(Y_TRAIN_PATH)
y_test = joblib.load(Y_TEST_PATH)


In [61]:
X_train.shape ,X_test.shape , y_train.shape , y_test.shape

((14839, 285), (3710, 285), (14839,), (3710,))

In [72]:
n = len(y_test)
p = X_test.shape[1]

In [73]:
def adj_r2_score(y_test, y_pred):
    r2 = r2_score(y_test, y_pred)
    r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    return r2

In [None]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

# Example categorical / numerical columns
cat_cols = ['type', 'available_from', 'city']
num_cols = ['size_sqm', 'bedrooms_num', 'bathrooms_num']

# ----------------------------
# 1️⃣ Convert to float tensors safely
# ----------------------------
def to_tensor(data):
    """Convert pandas/numpy data to float32 torch tensor."""
    if isinstance(data, torch.Tensor):
        return data.float()
    elif hasattr(data, 'values'):  # pandas DataFrame or Series
        return torch.from_numpy(np.array(data.values, dtype=np.float32))
    else:
        return torch.from_numpy(np.array(data, dtype=np.float32))

X_train = to_tensor(X_train)
X_test = to_tensor(X_test)
y_train = to_tensor(y_train).view(-1, 1)
y_test = to_tensor(y_test).view(-1, 1)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# ----------------------------
# 2️⃣ Define regularized model
# ----------------------------
class AdvancedRegressor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)

# ----------------------------
# 3️⃣ Initialize model
# ----------------------------
input_dim = X_train.shape[1]
model = AdvancedRegressor(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# ----------------------------
# 4️⃣ Train and log with MLflow
# ----------------------------
epochs = 500

with mlflow.start_run(run_name="AdvancedRegressor_PyTorch"):
    mlflow.log_params({
        "optimizer": "Adam",
        "lr": 0.001,
        "weight_decay": 1e-5,
        "epochs": epochs,
        "architecture": "512-256-128-64",
        "dropout": "0.3-0.2-0.2-0.1"
    })

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        preds = model(X_train)
        loss = criterion(preds, y_train)
        loss.backward()
        optimizer.step()

        # Validation
        if (epoch + 1) % 20 == 0:
            model.eval()
            with torch.no_grad():
                val_preds = model(X_test)
                val_loss = criterion(val_preds, y_test)
            print(f"Epoch [{epoch+1}/{epochs}] | Train Loss: {loss.item():.4f} | Val Loss: {val_loss.item():.4f}")

    # ----------------------------
    # 5️⃣ Evaluation and MLflow logging
    # ----------------------------
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test).numpy()
        y_true = y_test.numpy()

        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_true, y_pred)
        n = len(y_true)
        p = X_test.shape[1]
        adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

    mlflow.log_metrics({
        "mse": mse,
        "rmse": rmse,
        "r2": r2,
        "adj_r2": adj_r2
    })

    signature = infer_signature(X_test.numpy(), y_pred)
    mlflow.pytorch.log_model(model, name="pytorch_model", signature=signature)

    print("\n✅ Model logged to MLflow successfully!")

print(f"\n✅ RMSE: {rmse:.2f}")
print(f"✅ R²: {r2:.4f}")
print(f"✅ Adjusted R²: {adj_r2:.4f}")

torch.Size([14839, 285]) torch.Size([3710, 285]) torch.Size([14839, 1]) torch.Size([3710, 1])
Epoch [20/500] | Train Loss: 227.5221 | Val Loss: 244.6290
Epoch [40/500] | Train Loss: 205.8002 | Val Loss: 206.4690
Epoch [60/500] | Train Loss: 188.2114 | Val Loss: 189.2961
Epoch [80/500] | Train Loss: 171.4303 | Val Loss: 173.0724
Epoch [100/500] | Train Loss: 154.2118 | Val Loss: 155.4861
Epoch [120/500] | Train Loss: 135.9361 | Val Loss: 135.7750
Epoch [140/500] | Train Loss: 117.9495 | Val Loss: 117.2322
Epoch [160/500] | Train Loss: 100.6853 | Val Loss: 101.1785
Epoch [180/500] | Train Loss: 84.4353 | Val Loss: 84.9480
Epoch [200/500] | Train Loss: 69.3884 | Val Loss: 70.3391
Epoch [220/500] | Train Loss: 55.6590 | Val Loss: 56.8775
Epoch [240/500] | Train Loss: 43.4566 | Val Loss: 44.8762
Epoch [260/500] | Train Loss: 32.9181 | Val Loss: 34.8787
Epoch [280/500] | Train Loss: 24.3824 | Val Loss: 26.1651
Epoch [300/500] | Train Loss: 17.5073 | Val Loss: 19.1049
Epoch [320/500] | Train 



In [None]:
joblib.dump(model, "../models/pytorch_regressor.pkl")