In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import gc
import wandb
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import TensorDataset, DataLoader 
from torch.cuda.amp import autocast, GradScaler

from multiprocessing import cpu_count
from sklearn.model_selection import train_test_split

from model.mlp import MLP


def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    random.seed(seed)

#### Set Params

In [2]:
SEED = 333
seed_everything(SEED)

BATCH_SIZE = 32
LEARNING_RATE = 1e-4
EPOCHS = 10000
PATIENCE = 20

SAVE_PATH = '.'
RUN_NAME = 'MLP-regression'
WANDB_PRJ = 'public'

WANDB_CONFIG = {
    'seed': SEED,
    'model': RUN_NAME,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,  
}

if not os.path.exists(SAVE_PATH):
        os.makedirs(SAVE_PATH)

#### Load Dataset and Make Loader

In [3]:
df = pd.read_csv('./sample_datasets/house-prices/preprocessed.csv')
df.head(2).T

Unnamed: 0,0,1
MSSubClass,0.073350,-0.872264
LotFrontage,-0.207948,0.409724
LotArea,-0.207071,-0.091855
OverallQual,0.651256,-0.071812
OverallCond,-0.517023,2.178881
...,...,...
SaleCondition_Family,0.000000,0.000000
SaleCondition_Normal,1.000000,1.000000
SaleCondition_Partial,0.000000,0.000000
SaleCondition_nan,0.000000,0.000000


In [4]:
X, y = df.drop('SalePrice', axis=1).values, df['SalePrice'].values

trn_data, val_data, trn_label, val_label = train_test_split(X, y, test_size=0.2, random_state=SEED)

trn_ds, val_ds = list(map(lambda x, y: TensorDataset(torch.tensor(x, dtype=torch.float), torch.tensor(y, dtype=torch.float)), 
                          [trn_data, val_data], [trn_label, val_label]))
trn_dl, val_dl = list(map(lambda x, y: DataLoader(x, batch_size=BATCH_SIZE, num_workers=cpu_count(), shuffle=y, drop_last=True), 
                          [trn_ds, val_ds], [True, False]))

datasets = {'train' : trn_ds, 'valid': val_ds}
dataloaders = {'train': trn_dl, 'valid': val_dl}

dataset_sizes = {x: len(datasets[x]) for x in ['train', 'valid']}

#### Prepare Model

In [5]:
model = MLP(in_features=331, hidden_dim=128, out_features=1, num_layers=4, dropout=0.1)
print(model)

optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=0, last_epoch=-1, verbose=False)
criterion = nn.MSELoss()
scaler = GradScaler()

MLP(
  (activation): GELU(approximate='none')
  (layers): ModuleList(
    (0): Linear(in_features=331, out_features=128, bias=True)
    (1): GELU(approximate='none')
    (2): Linear(in_features=128, out_features=128, bias=True)
    (3): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (4): GELU(approximate='none')
    (5): Dropout(p=0.1, inplace=False)
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (8): GELU(approximate='none')
    (9): Dropout(p=0.1, inplace=False)
    (10): Linear(in_features=128, out_features=128, bias=True)
    (11): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (12): GELU(approximate='none')
    (13): Dropout(p=0.1, inplace=False)
    (14): Linear(in_features=128, out_features=1, bias=True)
  )
)


#### Training

In [6]:
wandb.init(name=RUN_NAME, project=WANDB_PRJ, config=WANDB_CONFIG, reinit=True) # connect

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'model running on {device}')

model = model.to(device)

min_loss = np.Inf
trials = 0

for e in range(EPOCHS):
    
    gc.collect()
    torch.cuda.empty_cache()
    running_log = {'epoch': e+1} # logging
    
    for phase in ['train', 'valid']: 
        running_loss = 0.0
        
        model.train() if phase == 'train' else model.eval()

        for idx, (features, labels) in enumerate(dataloaders[phase]):
            features = features.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
        
            with torch.set_grad_enabled(phase=='train'):
                # forward
                with autocast():
                    logits = model(features)
                    loss = criterion(logits.view(-1), labels)  
                    
                # backward
                if phase == 'train' and 'cuda' in device:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                elif phase == 'train':
                    loss.backward()
                    optimizer.step()
                    
                running_loss += loss.item() * features.size(0)
                             
        if phase == 'train' and e >= 10:
            scheduler.step()
    
        epoch_loss = running_loss / dataset_sizes[phase]
        running_log.update({f'{phase}_loss': epoch_loss}) # logging
        
        if (e+1) % 10 == 0: 
            print(f'epoch {e+1} {phase} | Loss: {epoch_loss:.4f}')
        
        # save best model
        if phase == 'valid':
            if epoch_loss < min_loss:
                torch.save(model.state_dict(), f'{SAVE_PATH}/{RUN_NAME}-best.pt')
                min_loss = epoch_loss
                trials = 0
            else:
                trials += 1  
                
    wandb.log(running_log) # logging
    
    # early stopping
    if trials >= PATIENCE:
        print(f'Early Stopping at epoch {e+1} with valid loss {min_loss:.4f}')
        break

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33m33h002[0m. Use [1m`wandb login --relogin`[0m to force relogin


model running on cuda
epoch 10 train | Loss: 30.7220
epoch 10 valid | Loss: 27.3417
epoch 20 train | Loss: 9.8301
epoch 20 valid | Loss: 8.2137
epoch 30 train | Loss: 2.9762
epoch 30 valid | Loss: 2.1576
epoch 40 train | Loss: 1.2122
epoch 40 valid | Loss: 0.7487
epoch 50 train | Loss: 0.9577
epoch 50 valid | Loss: 0.4516
epoch 60 train | Loss: 0.8723
epoch 60 valid | Loss: 0.4157
epoch 70 train | Loss: 0.8357
epoch 70 valid | Loss: 0.3806
epoch 80 train | Loss: 0.6436
epoch 80 valid | Loss: 0.2233
epoch 90 train | Loss: 0.5134
epoch 90 valid | Loss: 0.1514
epoch 100 train | Loss: 0.5166
epoch 100 valid | Loss: 0.1427
epoch 110 train | Loss: 0.3871
epoch 110 valid | Loss: 0.0451
epoch 120 train | Loss: 0.4062
epoch 120 valid | Loss: 0.0296
epoch 130 train | Loss: 0.3918
epoch 130 valid | Loss: 0.0312
epoch 140 train | Loss: 0.3866
epoch 140 valid | Loss: 0.0273
epoch 150 train | Loss: 0.3660
epoch 150 valid | Loss: 0.0272
epoch 160 train | Loss: 0.3815
epoch 160 valid | Loss: 0.0256
Ea

VBox(children=(Label(value='0.002 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.133058…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid_loss,█▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,167.0
train_loss,0.36555
valid_loss,0.02654
