In [1]:
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
def generate_classification_synthetic_data(num_samples=100_000, num_features=20, seed=42):
    torch.manual_seed(seed)
    X = torch.randn(num_samples, num_features)

    weights = torch.randn(num_features)
    bias = torch.randn(1)
    logits = X @ weights + bias
    probabilities = torch.sigmoid(logits)

    y = (probabilities > 0.5).float().unsqueeze(1)
    return X, y


def generate_synthetic_regression_data(num_samples=100_000, num_features=20, seed=42):
    torch.manual_seed(seed)
    X = torch.randn(num_samples, num_features)

    weights = torch.randn(num_features)
    bias = torch.randn(1)

    linear_part = X @ weights + bias
    nonlinear_part = torch.sin(X[:, 0]) + torch.log(torch.abs(X[:, 1]) + 1) + 0.5 * X[:, 2] ** 2

    y = linear_part + nonlinear_part + 0.1 * torch.randn(num_samples)
    y = y.unsqueeze(1)

    return X, y


In [3]:
X, y = generate_synthetic_regression_data()

dataset = TensorDataset(X, y)
batch_size=64
val_split=0.2

val_size = int(len(dataset) * val_split)
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [4]:
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)

In [5]:
class HousingModel(nn.Module):
    def __init__(self, num_cols, hidden_size):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(num_cols, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.output = nn.Linear(hidden_size // 2, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # x = torch.sigmoid(self.output(x)) # classification
        x = self.output(x) # regression
        return x


model = HousingModel(
    num_cols=X.shape[1],
    hidden_size=20
).to(device)

lr = 1e-3
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

- optimizer.zero_grad() clear old gradients
- loss.backward() compute gradients from loss
- torch.nn.utils.clip_grad_norm_(...) stabilize with gradient clipping
- optimizer.step() update parameters using gradients

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  loss_history = []
  score_history = []

  for batch, (X, y) in enumerate(dataloader):
    # compute prediction and loss
    pred = model(X).squeeze(1) # forward pass
    loss = loss_fn(pred, y.squeeze(1))

    # backpropagation
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 2.0)
    optimizer.step()

    if batch % 1000 == 0:
      loss, current = loss.item(), batch * len(X)
      loss_history.append(loss)

      score = mean_squared_error(y.cpu().detach().numpy().tolist(), pred.cpu().detach().numpy().tolist())
      score_history.append(score)

      print(f'loss: {loss:>7f}  [{current:>5d}/{size:>5d}]')

  return loss_history, score_history


def predict(dataloader, model):
  final_preds = []

  model.eval()
  with torch.no_grad():
    for X, _ in dataloader:
      pred = model(X).squeeze(1)
      final_preds.extend(pred.cpu().detach().numpy().tolist())

  return final_preds

In [7]:
loss_history = []
score_history = []

epochs = 3

for epoch in range(epochs):
   print(f"Epoch {epoch + 1}\n-------------------------------")
   epoch_loss_history, epoch_score_history = train_loop(train_dataloader, model, loss_fn, optimizer)
   loss_history.extend(epoch_loss_history)
   score_history.extend(epoch_score_history)

Epoch 1
-------------------------------
loss: 16.605732  [    0/80000]
loss: 0.131220  [64000/80000]
Epoch 2
-------------------------------
loss: 0.184492  [    0/80000]
loss: 0.110185  [64000/80000]
Epoch 3
-------------------------------
loss: 0.194690  [    0/80000]
loss: 0.193746  [64000/80000]


In [8]:
y_test = []
for batch in val_ds:
    _, y = batch
    y_test.append(y.item())


y_pred = predict(test_dataloader, model)
mean_squared_error(y_test, y_pred)

0.12335046865072342