In [1]:
#Import required Libraries
import pandas as pd

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.nn import MSELoss

from HousePriceDataset import HousePriceDataset
from PredictorModel import HousePricePredictor
from ModelTrainingEpoch import model_training_epoch
from ModelValidationEpoch import model_validation_epoch

In [2]:
# 1. Load and Preprocess Data
housing_price_dataset = pd.read_csv("housing.csv")
housing_price_dataset.columns = housing_price_dataset.columns.str.strip()

housing_data_input = housing_price_dataset.drop(columns=["median_house_value", "ocean_proximity"])
housing_data_output = housing_price_dataset["median_house_value"]

train_df, test_df = train_test_split(
    housing_price_dataset,
    test_size=0.25,
    random_state=42,
    shuffle=True,
)

X_train_raw = train_df.drop(columns=["median_house_value", "ocean_proximity"], errors='ignore')
Y_train_raw = train_df["median_house_value"]

X_test_raw = test_df.drop(columns=["median_house_value", "ocean_proximity"], errors='ignore')
Y_test_raw = test_df["median_house_value"]

# Scaling
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_x.fit_transform(X_train_raw.values)
Y_train = scaler_y.fit_transform(Y_train_raw.values.reshape(-1, 1))

X_test = scaler_x.transform(X_test_raw.values)
Y_test = scaler_y.transform(Y_test_raw.values.reshape(-1, 1))

In [3]:
# 2. Setup DataLoaders
train_dataset = HousePriceDataset(X_train, Y_train)
val_dataset = HousePriceDataset(X_test, Y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [4]:
# 3. Initialize Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HousePricePredictor(input_dim=X_train.shape[1])
loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=1e-3)

In [5]:
# 4. Training Loop
num_epochs = 50
print(f"Starting model training on {device}...")

for epoch in range(num_epochs):
    train_loss = model_training_epoch(
        model,
        train_loader,
        loss_fn,
        optimizer,
        device
    )

    val_loss = model_validation_epoch(
        model,
        val_loader,
        loss_fn,
        device
    )

    print(
        f"Epoch [{epoch+1}/{num_epochs}] "
        f"Train Loss: {train_loss:.4f} "
        f"Val Loss: {val_loss:.4f}"
    )

Starting model training on cpu...
Epoch [1/50] Train Loss: 0.3508 Val Loss: nan
Epoch [2/50] Train Loss: 0.2920 Val Loss: nan
Epoch [3/50] Train Loss: 0.2788 Val Loss: nan
Epoch [4/50] Train Loss: 0.2712 Val Loss: nan
Epoch [5/50] Train Loss: 0.2649 Val Loss: nan
Epoch [6/50] Train Loss: 0.2593 Val Loss: nan
Epoch [7/50] Train Loss: 0.2547 Val Loss: nan
Epoch [8/50] Train Loss: 0.2466 Val Loss: nan
Epoch [9/50] Train Loss: 0.2406 Val Loss: nan
Epoch [10/50] Train Loss: 0.2370 Val Loss: nan
Epoch [11/50] Train Loss: 0.2334 Val Loss: nan
Epoch [12/50] Train Loss: 0.2283 Val Loss: nan
Epoch [13/50] Train Loss: 0.2256 Val Loss: nan
Epoch [14/50] Train Loss: 0.2240 Val Loss: nan
Epoch [15/50] Train Loss: 0.2215 Val Loss: nan
Epoch [16/50] Train Loss: 0.2194 Val Loss: nan
Epoch [17/50] Train Loss: 0.2194 Val Loss: nan
Epoch [18/50] Train Loss: 0.2168 Val Loss: nan
Epoch [19/50] Train Loss: 0.2151 Val Loss: nan
Epoch [20/50] Train Loss: 0.2133 Val Loss: nan
Epoch [21/50] Train Loss: 0.2125 Va