In [16]:
# Matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
# Numpy
import numpy as np
# Pandas
import pandas as pd
# Torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchmetrics.classification import BinaryAccuracy

In [17]:
# Define device for torch
device = torch.device("cpu")
# MPS for Apple Silicon GPUs
if torch.mps.is_available():
   print("MPS is available")
   device = torch.device("mps")

# CUDA for Nvidia GPUs
if torch.cuda.is_available():
   print("CUDA is available")
   device = torch.device("cuda")
print(device)

MPS is available
mps


In [18]:
class HDBDataset(Dataset):
    def __init__(self, csv_path):
        self.dataframe = pd.read_csv(csv_path)
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        # Extract target from index 3
        target = self.dataframe.iloc[idx, 3]  
        
        # Extract all other columns *except* the target column (drop index 3)
        features = self.dataframe.drop(columns=self.dataframe.columns[3]).iloc[idx].values
        
        # Convert to PyTorch tensors
        features = torch.tensor(features, dtype=torch.float32)
        target = torch.tensor(target, dtype=torch.float32)
        
        return features, target

In [27]:
batch_size = 512

train_dataset = HDBDataset("data/train_data.csv")
test_dataset = HDBDataset("data/test_data.csv")
valid_dataset = HDBDataset("data/valid_data.csv")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle= True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle= False, num_workers=4)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle= False, num_workers=4)

In [28]:
class HDBPricePredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.input = nn.Linear(52, 128)
        self.hidden1 = nn.Linear(128, 64)
        self.hidden2 = nn.Linear(64, 32)
        self.fc = nn.Linear(32, 1)
        self.activation = nn.ReLU()
        self.loss = nn.MSELoss()

    def forward(self, x):
        x = self.input(x)
        x = self.activation(self.hidden1(x))
        x = self.activation(self.hidden2(x))
        x = self.fc(x)
        return x

In [29]:
# Create Neural Network model
model = HDBPricePredictor().to(device)

# Gradient descent parameters: optimizers, repetitions, etc.
num_epochs = 50
optimizer = torch.optim.Adam(model.parameters(), 
                           lr=0.01,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.000)
optimizer.zero_grad()

train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

for epoch in range(num_epochs):
    # Training loop
    model.train()
    # Create lists for the epoch's train accuracies
    batch_train_losses = []
    batch_no = 0
    for batch in train_loader:
        batch_no +=1
        print("training batch", {batch_no})
        # Unpack the mini-batch data
        inputs_batch, outputs_batch = batch
        outputs_re = outputs_batch.to(device).reshape(-1, 1)
        inputs_re = inputs_batch.to(device)
        
        # Forward pass
        pred = model(inputs_re)
        loss_value = model.loss(pred.float(), outputs_re.float())
    
        # Backward pass and optimization
        loss_value.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        batch_train_losses.append(loss_value.item())
    train_losses.append(sum(batch_train_losses)/len(batch_train_losses))
    # Validation loop
    model.eval()
    
    print("Training done! Validation commencing...")
    valid_no = 0
    with torch.no_grad():
        for batch in valid_loader:
            valid_no += 1
            print("validating batch", {valid_no})
            inputs_batch, outputs_batch = batch
            outputs_re = outputs_batch.to(device).reshape(-1, 1)
            inputs_re = inputs_batch.to(device)
            
            pred = model(inputs_re)
            val_loss = model.loss(pred.float(), outputs_re.float())
            
            val_losses.append(val_loss.item())
    
    print(f'Epoch [{epoch+1}/{num_epochs}]', f'Train Loss: {loss_value.item():.4f}',f'Val Loss: {val_loss.item():.4f}')
    
    # Save the model weights for each epoch
    torch.save(model.state_dict(), f'nn_baseline_weights/model_weights_epoch_{epoch+1}.pth')

Traceback (most recent call last):
  File [35m"<string>"[0m, line [35m1[0m, in [35m<module>[0m
    from multiprocessing.spawn import spawn_main; [31mspawn_main[0m[1;31m(tracker_fd=83, pipe_handle=97)[0m
                                                  [31m~~~~~~~~~~[0m[1;31m^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[0m
  File [35m"/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/spawn.py"[0m, line [35m122[0m, in [35mspawn_main[0m
    exitcode = _main(fd, parent_sentinel)
  File [35m"/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/spawn.py"[0m, line [35m132[0m, in [35m_main[0m
    self = reduction.pickle.load(from_parent)
[1;35mAttributeError[0m: [35mCan't get attribute 'HDBDataset' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>[0m


KeyboardInterrupt: 