In [1]:
import warnings
warnings.filterwarnings("ignore")

import math
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, ExponentialLR, CosineAnnealingLR, StepLR, OneCycleLR

from dataloader import Dataset_AMEX
from metric import AmexMetric
from model_pe import Transformer

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [2]:
# dataset = Dataset_AMEX('val')
# y_true = torch.tensor(dataset[0][1], dtype=torch.float)[None]

# model = Transformer(num_tokens=1,
#         feat_dim=188,
#         embed_dim = 64,
#         num_heads=4,
#         num_encoder_layers=2,
#         dropout_p=0.3)
# y_hats = model(torch.tensor(dataset[0][0])[None])
# loss_fn = nn.BCELoss(reduction="mean")
# loss_fn(y_hats.squeeze(1), y_true)
# #val_amex_metric = AmexMetric()
# #val_amex_metric.update(y_hats.reshape(-1), y_true)

# Data Module

In [3]:
class Dataset_pl(pl.LightningDataModule):
    def __init__(self, fold):
        super().__init__()
        self.fold = 1
        
    def prepare_data(self):
        pass

    def setup(self, stage= None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            self.train_set = Dataset_AMEX('train', fold=self.fold)
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        if stage == "validate":
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        if stage == "predict" or stage is None:
            self.test_set = Dataset_AMEX('test')

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=512, shuffle=True, num_workers=1)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=2048, shuffle=False, num_workers=1)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=4096, shuffle=False, num_workers=1)

    def predict_dataloader(self):
        return DataLoader(self.test_set, batch_size=4096, shuffle=False, num_workers=1)

# Model

In [4]:
class Model_transformer(pl.LightningModule):
    def __init__(self, learning_rate=1e-3):#, batch_size):
        super().__init__()
        self.model = Transformer(num_tokens=1,
                        feat_dim=188,
                        embed_dim = 64,
                        num_heads=4,
                        num_encoder_layers=2,
                        dropout_p=0.3)
        self.learning_rate = learning_rate
        self.train_amex_metric = AmexMetric()
        self.val_amex_metric = AmexMetric()
        self.loss_fn = nn.BCELoss(reduction="mean")
        
    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        y_hat = self.model(x)
        return y_hat

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        loss = self.loss_fn(y_hat.squeeze(1), y)
        self.train_amex_metric.update(y_hat.squeeze(1), y)
        self.log_dict({'train_loss': loss, 'train_amex_metric': self.train_amex_metric}, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}
    
    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        loss = self.loss_fn(y_hat.squeeze(1), y)
        self.val_amex_metric.update(y_hat.squeeze(1), y)
        self.log_dict({'val_loss': loss, 'val_amex_metric': self.val_amex_metric}, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}       

    def test_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        #loss = self.loss_fn(y_hats.squeeze(1), y_true)

    def predict_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        with torch.no_grad():
            y_hat = self.model(x)#.squeeze(1)
        return y_hat
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
        lr_scheduler = OneCycleLR(optimizer, max_lr=1e-3, epochs=25, steps_per_epoch=718) #steps_per_epoch=len(dataloader)
        return [optimizer], [lr_scheduler]

# Find LR

In [5]:
# dm = Dataset_pl(1)
# model = Model_transformer()#, argv['batch_size']) 
# trainer = pl.Trainer(gpus=2, strategy='dp')
# lr_finder = trainer.tuner.lr_find(model, dm)

# # Results can be found in
# lr_finder.results

# # Plot with
# fig = lr_finder.plot(suggest=True)
# fig.show()

# # Pick point based on plot, or get suggestion
# new_lr = lr_finder.suggestion()

# Training

In [6]:
# dm = Dataset_pl(1)
# model = Model_transformer()#, argv['batch_size']) 

# wandb_logger = WandbLogger(project="AMEX")
# callbacks=[ModelCheckpoint(dirpath='ckpt', 
#                            monitor="val_amex_metric", mode="max")]

# trainer = pl.Trainer(gpus=[1], max_epochs=30, 
#                     logger=wandb_logger, callbacks=callbacks,
#                     enable_progress_bar=False)

# trainer.fit(model, datamodule=dm)

# # get validation metrics
# val = trainer.validate(model, datamodule=dm, ckpt_path='best')
# val_amex_metric_epoch = val[0]['val_amex_metric_epoch']

# # get output
# output = trainer.predict(model, datamodule=dm, ckpt_path='best')
# output = torch.vstack(output)

In [None]:
val_metrics = []
outputs = []
for i in range(1,11):
    dm = Dataset_pl(i)
    model = Model_transformer()#, argv['batch_size']) 

    #wandb_logger = WandbLogger(project="AMEX")
    callbacks=[ModelCheckpoint(dirpath='ckpt', 
                               monitor="val_amex_metric", mode="max")]

#     trainer = pl.Trainer(gpus=[1], max_epochs=25, 
#                         logger=wandb_logger, callbacks=callbacks,
#                         enable_progress_bar=False)
    trainer = pl.Trainer(gpus=[1], max_epochs=25, 
                         callbacks=callbacks,
                         enable_progress_bar=False)
    trainer.fit(model, datamodule=dm)

    # get validation metrics
    val = trainer.validate(model, datamodule=dm, ckpt_path='best')
    val_amex_metric_epoch = val[0]['val_amex_metric_epoch']
    
    # get output
    output = trainer.predict(model, datamodule=dm, ckpt_path='best')
    output = torch.vstack(output)
    # save result
    val_metrics.append(val_amex_metric_epoch)
    outputs.append(output)
    
    print(f"fold {i}", val_amex_metric_epoch)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)


Restoring states from the checkpoint path at /RP1/mydocker/Ben/amex/ckpt/epoch=24-step=17950.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Loaded model weights from checkpoint at /RP1/mydocker/Ben/amex/ckpt/epoch=24-step=17950.ckpt


### Validation data shapes (91782, 13, 188) (91782,)
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7779491559726072
     val_loss_epoch         0.22833910584449768
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100% 20/20 [00:02<00:00,  7.55it/s]
Restoring states from the checkpoint path at /RP1/mydocker/Ben/amex/ckpt/epoch=24-step=17950.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
Loaded model weights from checkpoint at /RP1/mydocker/Ben/amex/ckpt/epoch=24-step=17950.ckpt


### Test data shapes (924621, 13, 188) (210,)


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


fold 1 0.7779491559726072
