In [32]:
import pandas as pd
import numpy as np


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

In [46]:
# Check if MPS is available to use GPU from mac
print(torch.__version__)
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

2.6.0
True
True
Using device: mps


# Data

In [48]:
train = pd.read_csv('train_cleaned.csv')
test = pd.read_csv('test_cleaned.csv')

train = train.astype('float32')
test = test.astype('float32')

X_train = torch.tensor(train.drop('y', axis=1).values, dtype=torch.float32)
y_train = torch.tensor(train['y'].values, dtype=torch.float32)

In [47]:
print(X_train.shape, y_train.shape)

torch.Size([8000, 54]) torch.Size([8000])


# Simple FNN with regularization layers.

In [49]:
class RealEstateNN(nn.Module):
    def __init__(self, input_size):
        super(RealEstateNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 54),
            nn.BatchNorm1d(54),
            nn.SiLU(),
            nn.Dropout(0.3),
            
            nn.Linear(54, 48),
            nn.BatchNorm1d(48),
            nn.SiLU(),
            nn.Dropout(0.3),

            nn.Linear(48, 24),
            nn.BatchNorm1d(24),
            nn.SiLU(),

            nn.Linear(24, 12),
            nn.BatchNorm1d(12),
            nn.SiLU(),

            nn.Linear(12, 1)
        )

    def forward(self, x):
        return self.model(x)

In [50]:
def train_fit(model, dataloader, criterion, optimizer, num_epochs=10, device=device):
    model.to(device)
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in dataloader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(dataloader):.4f}")
    print('model trained!')

In [53]:
input_size = 54
output_size = 1
learning_rate = 0.01
num_epochs = 40
batch_size = 16

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [54]:
model = RealEstateNN(input_size).to(device)
criterion = nn.L1Loss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [55]:
train_fit(model, dataloader, criterion, optimizer, num_epochs)

  return F.l1_loss(input, target, reduction=self.reduction)


Epoch [1/40], Loss: 540267.0848
Epoch [2/40], Loss: 538447.0519
Epoch [3/40], Loss: 535009.0884
Epoch [4/40], Loss: 530196.7386
Epoch [5/40], Loss: 524190.8332
Epoch [6/40], Loss: 517120.7262
Epoch [7/40], Loss: 509046.9859
Epoch [8/40], Loss: 500033.6099
Epoch [9/40], Loss: 490204.9403
Epoch [10/40], Loss: 479508.3990
Epoch [11/40], Loss: 468013.8521
Epoch [12/40], Loss: 455715.9816
Epoch [13/40], Loss: 442632.8937
Epoch [14/40], Loss: 429010.2549
Epoch [15/40], Loss: 415049.5221
Epoch [16/40], Loss: 400491.6928
Epoch [17/40], Loss: 386373.2968
Epoch [18/40], Loss: 372207.5634
Epoch [19/40], Loss: 358933.6702
Epoch [20/40], Loss: 345528.7452
Epoch [21/40], Loss: 333004.7159
Epoch [22/40], Loss: 321595.0877
Epoch [23/40], Loss: 311551.7176
Epoch [24/40], Loss: 302486.3279
Epoch [25/40], Loss: 294758.7840
Epoch [26/40], Loss: 288107.7563
Epoch [27/40], Loss: 282712.9626
Epoch [28/40], Loss: 278171.8748
Epoch [29/40], Loss: 274920.3875
Epoch [30/40], Loss: 272714.9997
Epoch [31/40], Loss