In [32]:
import pandas as pd
import numpy as np


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F

In [46]:
# Check if MPS is available to use GPU from mac
print(torch.__version__)
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

2.6.0
True
True
Using device: mps


# Data

In [48]:
train = pd.read_csv('train_cleaned.csv')
test = pd.read_csv('test_cleaned.csv')

train = train.astype('float32')
test = test.astype('float32')

X_train = torch.tensor(train.drop('y', axis=1).values, dtype=torch.float32)
y_train = torch.tensor(train['y'].values, dtype=torch.float32)

In [47]:
print(X_train.shape, y_train.shape)

torch.Size([8000, 54]) torch.Size([8000])


# Simple FNN with regularization layers.

In [56]:
class RealEstateNN(nn.Module):
    def __init__(self, input_size):
        super(RealEstateNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 54),
            nn.BatchNorm1d(54),
            nn.SiLU(),
            nn.Dropout(0.3),
            
            nn.Linear(54, 48),
            nn.BatchNorm1d(48),
            nn.SiLU(),
            nn.Dropout(0.3),

            nn.Linear(48, 24),
            nn.BatchNorm1d(24),
            nn.SiLU(),

            nn.Linear(24, 16),
            nn.BatchNorm1d(16),
            nn.SiLU(),

            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.model(x)

In [57]:
def train_fit(model, dataloader, criterion, optimizer, num_epochs=10, device=device):
    model.to(device)
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in dataloader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(dataloader):.4f}")
    print('model trained!')

In [58]:
input_size = 54
output_size = 1
learning_rate = 0.01
num_epochs = 40
batch_size = 16

dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [59]:
model = RealEstateNN(input_size).to(device)
criterion = nn.L1Loss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [60]:
train_fit(model, dataloader, criterion, optimizer, num_epochs)

Epoch [1/40], Loss: 540169.2477
Epoch [2/40], Loss: 537738.2916
Epoch [3/40], Loss: 533190.8945
Epoch [4/40], Loss: 526804.2943
Epoch [5/40], Loss: 518830.4114
Epoch [6/40], Loss: 509440.4201
Epoch [7/40], Loss: 498706.9887
Epoch [8/40], Loss: 486678.1907
Epoch [9/40], Loss: 473520.2427
Epoch [10/40], Loss: 459240.5992
Epoch [11/40], Loss: 444129.1390
Epoch [12/40], Loss: 427967.2282
Epoch [13/40], Loss: 411246.3484
Epoch [14/40], Loss: 394579.8829
Epoch [15/40], Loss: 377951.4528
Epoch [16/40], Loss: 361731.2054
Epoch [17/40], Loss: 346551.4120
Epoch [18/40], Loss: 332360.5399
Epoch [19/40], Loss: 319367.0889
Epoch [20/40], Loss: 308156.7116
Epoch [21/40], Loss: 298139.8168
Epoch [22/40], Loss: 290245.3075
Epoch [23/40], Loss: 283597.5293
Epoch [24/40], Loss: 278566.9479
Epoch [25/40], Loss: 274980.6519
Epoch [26/40], Loss: 272346.2241
Epoch [27/40], Loss: 270603.1941
Epoch [28/40], Loss: 269625.5077
Epoch [29/40], Loss: 268780.2187
Epoch [30/40], Loss: 268454.2105
Epoch [31/40], Loss