In [1]:
import torch
import numpy as np
import random
import math

from torch.optim import SGD
from torch.optim.lr_scheduler import LinearLR
from torch.utils.data import DataLoader, TensorDataset

from models import MLP
from preprocess import preprocess_UCI_dataset

seed = 42
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print ("Device: ", device)
print ("Seed: ", seed)

# set random seed
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

name_to_id_dict = {
    "wine_quality": 186,
    "student_performance": 320,
    "abalone": 1,
    "liver_disorders": 60,
    "concrete_compressive_strength": 165
}

encoding_type = "one_hot"
test_size = 0.2
normalize_target = True

Device:  cuda
Seed:  42


In [2]:
dataset_name = "liver_disorders"


X_train, X_test, y_train, y_test, preprocessor, y_scaler = preprocess_UCI_dataset(
    name_to_id_dict[dataset_name], 
    encoding_type, 
    normalize_target,
    test_size, 
    seed
)
    
# convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)

X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

# summary of the dataset
print ("X_train shape: ", X_train.shape)
print ("y_train shape: ", y_train.shape)
print ("X_test shape: ", X_test.shape)
print ("y_test shape: ", y_test.shape)

# set batch size to 0.05 of the training dataset, then pick the closest from [4, 8, 16, 32, 64, 128]
batch_size = math.ceil(0.05 * X_train.shape[0])
valid_batch_sizes = [4, 8, 16, 32, 64, 128]
batch_size = min(valid_batch_sizes, key=lambda x: abs(x - batch_size))
print("Batch size:", batch_size)

train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
print ("Number of iterations per epoch:", len(train_dataloader))

Fetched dataset name: Liver Disorders
Numeric features:  ['mcv', 'alkphos', 'sgpt', 'sgot', 'gammagt']
Categorical features:  []
X_train shape:  torch.Size([276, 5])
y_train shape:  torch.Size([276, 1])
X_test shape:  torch.Size([69, 5])
y_test shape:  torch.Size([69, 1])
Batch size: 16
Number of iterations per epoch: 18


In [3]:
lr = 1e-4#[1e-2. 1e-3, 1e-4, 1e-5]
epochs = 20
model = MLP(X_train.shape[-1], [32, 32], 1).to(device)
optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)
lr_scheduler = LinearLR(optimizer, end_factor=0.1, start_factor=1, total_iters=epochs)

In [4]:
train_losses = []
test_losses = []

for epoch in range(epochs):

    model.train()
    train_loss = 0

    for i, (X_batch, y_batch) in enumerate(train_dataloader):
        y_pred = model(X_batch)
        loss = torch.nn.functional.mse_loss(y_pred, y_batch, reduction='mean')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    lr_scheduler.step()
    
    train_losses.append(train_loss/len(train_dataloader))

    model.eval()
    y_pred = model(X_test)
    test_loss = torch.mean((y_pred - y_test)**2).item()
    test_losses.append(test_loss)

    if (epoch+1) % ((epochs)//10) == 0:
        print (f"Epoch: {epoch+1} Train Loss: {train_loss/len(train_dataloader)} Test Loss: {test_loss}")

Epoch: 2 Train Loss: 1.0957549413045247 Test Loss: 0.8865848779678345
Epoch: 4 Train Loss: 0.9104206677940156 Test Loss: 0.8375880718231201
Epoch: 6 Train Loss: 0.8997215545839734 Test Loss: 0.8181037306785583
Epoch: 8 Train Loss: 0.8434111956093047 Test Loss: 0.8090640902519226
Epoch: 10 Train Loss: 0.8202515062358644 Test Loss: 0.8036180138587952
Epoch: 12 Train Loss: 0.9828134758604897 Test Loss: 0.8012303113937378
Epoch: 14 Train Loss: 0.8068351613150703 Test Loss: 0.801332950592041
Epoch: 16 Train Loss: 0.8010087857643763 Test Loss: 0.8039773106575012
Epoch: 18 Train Loss: 0.7946086078882217 Test Loss: 0.8058134317398071
Epoch: 20 Train Loss: 0.8035050713353686 Test Loss: 0.8070772886276245
