In [13]:
#Import required Libraries
import pandas as pd

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.nn import MSELoss

import sys
sys.path.append('..')

from data.HousePriceDataset import HousePriceDataset
from model.PredictorModel import HousePricePredictor
from ModelTrainingEpoch import model_training_epoch
from ModelValidationEpoch import model_validation_epoch

In [14]:
# 1. Load and Preprocess Data
housing_price_dataset = pd.read_csv("../data/housing.csv")
housing_price_dataset.columns = housing_price_dataset.columns.str.strip()

housing_data_input = housing_price_dataset.drop(columns=["median_house_value", "ocean_proximity"])
housing_data_output = housing_price_dataset["median_house_value"]

train_df, test_df = train_test_split(
    housing_price_dataset,
    test_size=0.25,
    random_state=42,
    shuffle=True,
)

X_train_raw = train_df.drop(columns=["median_house_value", "ocean_proximity"], errors='ignore')
Y_train_raw = train_df["median_house_value"]

X_test_raw = test_df.drop(columns=["median_house_value", "ocean_proximity"], errors='ignore')
Y_test_raw = test_df["median_house_value"]

# Scaling
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_x.fit_transform(X_train_raw.values)
Y_train = scaler_y.fit_transform(Y_train_raw.values.reshape(-1, 1))

X_test = scaler_x.transform(X_test_raw.values)
Y_test = scaler_y.transform(Y_test_raw.values.reshape(-1, 1))

In [15]:
# 2. Setup DataLoaders
train_dataset = HousePriceDataset(X_train, Y_train)
val_dataset = HousePriceDataset(X_test, Y_test)

# Higher batch size for efficient parallel compute training (cuda) on gpu
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)

In [None]:
# 3. Initialize Model
device = torch.device("cuda" if torch.cuda.is_available else "cpu")
model_inp = X_train.shape[1]
model = HousePricePredictor(input_dim=model_inp)
model.to(device)
loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

In [17]:
# 4. Training Loop
num_epochs = 50
print(f"Starting model training on {device}...")

for epoch in range(num_epochs):
    train_loss = model_training_epoch(
        model,
        train_loader,
        loss_fn,
        optimizer,
        device
    )

    val_loss = model_validation_epoch(
        model,
        val_loader,
        loss_fn,
        device
    )

    print(
        f"Epoch [{epoch+1}/{num_epochs}] "
        f"Train Loss: {train_loss:.4f} "
        f"Val Loss: {val_loss:.4f}"
    )

Starting model training on cuda...
Epoch [1/50] Train Loss: 0.4713 Val Loss: nan
Epoch [2/50] Train Loss: 0.3334 Val Loss: nan
Epoch [3/50] Train Loss: 0.3151 Val Loss: nan
Epoch [4/50] Train Loss: 0.3012 Val Loss: nan
Epoch [5/50] Train Loss: 0.2929 Val Loss: nan
Epoch [6/50] Train Loss: 0.2865 Val Loss: nan
Epoch [7/50] Train Loss: 0.2805 Val Loss: nan
Epoch [8/50] Train Loss: 0.2764 Val Loss: nan
Epoch [9/50] Train Loss: 0.2725 Val Loss: nan
Epoch [10/50] Train Loss: 0.2698 Val Loss: nan
Epoch [11/50] Train Loss: 0.2671 Val Loss: nan
Epoch [12/50] Train Loss: 0.2645 Val Loss: nan
Epoch [13/50] Train Loss: 0.2622 Val Loss: nan
Epoch [14/50] Train Loss: 0.2615 Val Loss: nan
Epoch [15/50] Train Loss: 0.2590 Val Loss: nan
Epoch [16/50] Train Loss: 0.2557 Val Loss: nan
Epoch [17/50] Train Loss: 0.2532 Val Loss: nan
Epoch [18/50] Train Loss: 0.2506 Val Loss: nan
Epoch [19/50] Train Loss: 0.2476 Val Loss: nan
Epoch [20/50] Train Loss: 0.2451 Val Loss: nan
Epoch [21/50] Train Loss: 0.2442 V

In [18]:
# 5. Export the trained model for future
import joblib

# Path where fitted StandardScaler from scikit learn is saved
scaler_x_path = "../inference/scaler_x.joblib"
scaler_y_path = "../inference/scaler_y.joblib"

# Save the scaler
joblib.dump(scaler_x, scaler_x_path)
joblib.dump(scaler_y, scaler_y_path)
print(f"Scaler X saved to {scaler_x_path}")
print(f"Scaler Y saved to {scaler_y_path}")

# Define path
model_path = "../inference/model_parameters.pth"

# Save the state_dict (model weights and biases [parameters])
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")

# Save the model input configuration
with open("../inference/model_config.py", "w") as config:
    config.write(f"MODEL_INPUT_DIM = {model_inp}")
    print(f"Model input dimension saved to {model_path}")


Scaler X saved to ../inference/scaler_x.joblib
Scaler Y saved to ../inference/scaler_y.joblib
Model saved to ../inference/model_parameters.pth
Model input dimension saved to ../inference/model_parameters.pth
