In [1]:
import warnings
warnings.filterwarnings("ignore")

import math
import torch
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, ExponentialLR, CosineAnnealingLR, StepLR, OneCycleLR

from dataloader import Dataset_AMEX
from metric import AmexMetric
from model_kaggle_ben import Transformer

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

In [2]:
# dataset = Dataset_AMEX('val')
# y_true = torch.tensor(dataset[0][1], dtype=torch.float)[None]

# model = Transformer(num_tokens=1,
#         feat_dim=188,
#         embed_dim = 64,
#         num_heads=4,
#         num_encoder_layers=2,
#         dropout_p=0.3)
# y_hats = model(torch.tensor(dataset[0][0])[None])
# loss_fn = nn.BCELoss(reduction="mean")
# loss_fn(y_hats.squeeze(1), y_true)
# #val_amex_metric = AmexMetric()
# #val_amex_metric.update(y_hats.reshape(-1), y_true)

# Data Module

In [3]:
class Dataset_pl(pl.LightningDataModule):
    def __init__(self, fold):
        super().__init__()
        self.fold = 1
        
    def prepare_data(self):
        pass

    def setup(self, stage= None):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            self.train_set = Dataset_AMEX('train', fold=self.fold)
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        if stage == "validate":
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.val_set = Dataset_AMEX('val', fold=self.fold)
        if stage == "predict" or stage is None:
            self.test_set = Dataset_AMEX('test')

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=512, shuffle=True, num_workers=4)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=2048, shuffle=False, num_workers=4)

    def test_dataloader(self):
        return DataLoader(self.test_set, batch_size=4096, shuffle=False, num_workers=4)

    def predict_dataloader(self):
        return DataLoader(self.test_set, batch_size=4096, shuffle=False, num_workers=4)

# Model

In [4]:
class Model_transformer(pl.LightningModule):
    def __init__(self, learning_rate=1e-3):#, batch_size):
        super().__init__()
        self.model = Transformer(num_tokens=1,
                        feat_dim=188,
                        embed_dim = 64,
                        num_heads=4,
                        num_encoder_layers=2,
                        dropout_p=0.3)
        self.learning_rate = learning_rate
        self.train_amex_metric = AmexMetric()
        self.val_amex_metric = AmexMetric()
        self.loss_fn = nn.BCELoss(reduction="mean")
        
    def forward(self, x):
        # in lightning, forward defines the prediction/inference actions
        y_hat = self.model(x)
        return y_hat

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        loss = self.loss_fn(y_hat.squeeze(1), y)
        self.train_amex_metric.update(y_hat.squeeze(1), y)
        self.log_dict({'train_loss': loss, 'train_amex_metric': self.train_amex_metric}, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}
    
    def validation_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        loss = self.loss_fn(y_hat.squeeze(1), y)
        self.val_amex_metric.update(y_hat.squeeze(1), y)
        self.log_dict({'val_loss': loss, 'val_amex_metric': self.val_amex_metric}, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {'loss': loss}       

    def test_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        y_hat = self.model(x)
        # loss function
        #loss = self.loss_fn(y_hats.squeeze(1), y_true)

    def predict_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        x, y = batch
        x, y = x.float(), y.float()
        with torch.no_grad():
            y_hat = self.model(x)#.squeeze(1)
        return y_hat
    
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.learning_rate)
        lr_scheduler = OneCycleLR(optimizer, max_lr=1e-3, epochs=25, steps_per_epoch=718) #steps_per_epoch=len(dataloader)
        return [optimizer], [lr_scheduler]

# Find LR

In [5]:
# dm = Dataset_pl(1)
# model = Model_transformer()#, argv['batch_size']) 
# trainer = pl.Trainer(gpus=2, strategy='dp')
# lr_finder = trainer.tuner.lr_find(model, dm)

# # Results can be found in
# lr_finder.results

# # Plot with
# fig = lr_finder.plot(suggest=True)
# fig.show()

# # Pick point based on plot, or get suggestion
# new_lr = lr_finder.suggestion()

# Training

In [6]:
# dm = Dataset_pl(1)
# model = Model_transformer()#, argv['batch_size']) 

# wandb_logger = WandbLogger(project="AMEX")
# callbacks=[ModelCheckpoint(dirpath='ckpt', 
#                            monitor="val_amex_metric", mode="max")]

# trainer = pl.Trainer(gpus=1, max_epochs=30, 
#                     logger=wandb_logger, callbacks=callbacks,
#                     enable_progress_bar=True)

# trainer.fit(model, datamodule=dm)

# # get validation metrics
# val = trainer.validate(model, datamodule=dm, ckpt_path='best')
# val_amex_metric_epoch = val[0]['val_amex_metric_epoch']

# # get output
# output = trainer.predict(model, datamodule=dm, ckpt_path='best')
# output = torch.vstack(output)

In [7]:
val_metrics = []

for i in range(1,11):
    dm = Dataset_pl(i)
    model = Model_transformer()#, argv['batch_size']) 

    #wandb_logger = WandbLogger(project="AMEX")
    callbacks=[ModelCheckpoint(dirpath='ckpt', 
                               monitor="val_amex_metric", mode="max")]

#     trainer = pl.Trainer(gpus=[1], max_epochs=25, 
#                         logger=wandb_logger, callbacks=callbacks,
#                         enable_progress_bar=True)
    trainer = pl.Trainer(gpus=1, max_epochs=25, 
                         callbacks=callbacks,
                         enable_progress_bar=True)
    trainer.fit(model, datamodule=dm)

    # get validation metrics
    val = trainer.validate(model, datamodule=dm, ckpt_path='best')
    val_amex_metric_epoch = val[0]['val_amex_metric_epoch']
    
    # get output
    output = trainer.predict(model, datamodule=dm, ckpt_path='best')
    output = torch.vstack(output)
    # save result
    val_metrics.append(val_amex_metric_epoch)

    df = pd.DataFrame(output)
    df2 = pd.read_csv('submission.csv')
    df2['prediction']=df.values
    df2.to_csv(f'results/onecycle_prediction_fold{i}.csv', index=False)
    del dm, trainer, model, output, df, df2
    import gc
    gc.collect()

    print(f"fold {i}", val_amex_metric_epoch)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:37<00:00, 20.31it/s, loss=0.202, v_num=15, train_loss_step=0.122, val_loss_step=0.236, val_loss_epoch=0.228, val_amex_metric_epoch=0.778, train_loss_epoch=0.213, train_amex_metric_epoch=0.800]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=20-step=15078.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=20-step=15078.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 57.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch      0.780931261396073
     val_loss_epoch         0.22618217766284943
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:23<00:00,  1.19s/it]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=20-step=15078.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=20-step=15078.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:10<00:00, -46.70it/s]  
fold 1 0.780931261396073


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:37<00:00, 20.27it/s, loss=0.207, v_num=17, train_loss_step=0.194, val_loss_step=0.233, val_loss_epoch=0.227, val_amex_metric_epoch=0.776, train_loss_epoch=0.214, train_amex_metric_epoch=0.798] 


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 51.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7793483844302438
     val_loss_epoch         0.22654196619987488
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:21<00:00,  1.08s/it]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:07<00:00, -62.37it/s]  


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


fold 2 0.7793483844302438
### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.46it/s, loss=0.203, v_num=18, train_loss_step=0.110, val_loss_step=0.232, val_loss_epoch=0.227, val_amex_metric_epoch=0.777, train_loss_epoch=0.214, train_amex_metric_epoch=0.797]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488-v1.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488-v1.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 61.96it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7788639593931597
     val_loss_epoch         0.22663640975952148
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 12.34it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488-v1.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=15-step=11488-v1.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:08<00:00, -57.22it/s]  
fold 3 0.7788639593931597


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.30it/s, loss=0.22, v_num=19, train_loss_step=0.258, val_loss_step=0.234, val_loss_epoch=0.226, val_amex_metric_epoch=0.780, train_loss_epoch=0.214, train_amex_metric_epoch=0.798] 


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v3.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v3.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 63.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch       0.7810501423454
     val_loss_epoch         0.2258906066417694
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 12.57it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v3.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v3.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:08<00:00, -60.65it/s]  
fold 4 0.7810501423454


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.33it/s, loss=0.205, v_num=20, train_loss_step=0.148, val_loss_step=0.229, val_loss_epoch=0.227, val_amex_metric_epoch=0.778, train_loss_epoch=0.213, train_amex_metric_epoch=0.799]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=12-step=9334.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=12-step=9334.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 62.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7804443115311672
     val_loss_epoch         0.22660371661186218
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 13.17it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=12-step=9334.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=12-step=9334.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:07<00:00, -62.42it/s]  
fold 5 0.7804443115311672


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.29it/s, loss=0.228, v_num=21, train_loss_step=0.412, val_loss_step=0.233, val_loss_epoch=0.226, val_amex_metric_epoch=0.782, train_loss_epoch=0.214, train_amex_metric_epoch=0.798] 


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=24-step=17950.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=24-step=17950.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 62.40it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7816624693959793
     val_loss_epoch         0.22558875381946564
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 13.15it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=24-step=17950.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=24-step=17950.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:07<00:00, -64.35it/s]  
fold 6 0.7816624693959793


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.39it/s, loss=0.205, v_num=22, train_loss_step=0.0408, val_loss_step=0.229, val_loss_epoch=0.227, val_amex_metric_epoch=0.778, train_loss_epoch=0.215, train_amex_metric_epoch=0.797]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=21-step=15796.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=21-step=15796.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 60.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7793279794133677
     val_loss_epoch         0.22645488381385803
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 13.10it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=21-step=15796.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=21-step=15796.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:08<00:00, -58.27it/s]  
fold 7 0.7793279794133677


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.30it/s, loss=0.209, v_num=23, train_loss_step=0.139, val_loss_step=0.230, val_loss_epoch=0.228, val_amex_metric_epoch=0.778, train_loss_epoch=0.213, train_amex_metric_epoch=0.800] 


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=19-step=14360.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=19-step=14360.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 66.45it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7793330949803576
     val_loss_epoch         0.22663722932338715
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 12.96it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=19-step=14360.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=19-step=14360.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:07<00:00, -63.60it/s]  
fold 8 0.7793330949803576


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.48it/s, loss=0.206, v_num=24, train_loss_step=0.165, val_loss_step=0.231, val_loss_epoch=0.228, val_amex_metric_epoch=0.778, train_loss_epoch=0.214, train_amex_metric_epoch=0.797]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v4.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v4.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 62.88it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch     0.7809270582589025
     val_loss_epoch         0.2268919199705124
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 13.06it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v4.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=17-step=12924-v4.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:07<00:00, -63.78it/s]  


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


fold 9 0.7809270582589025


HPU available: False, using: 0 HPUs


### Training data shapes (367131, 13, 188) (367131,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type        | Params
--------------------------------------------------
0 | model             | Transformer | 509 K 
1 | train_amex_metric | AmexMetric  | 0     
2 | val_amex_metric   | AmexMetric  | 0     
3 | loss_fn           | BCELoss     | 0     
--------------------------------------------------
509 K     Trainable params
0         Non-trainable params
509 K     Total params
2.037     Total estimated model params size (MB)


### Validation data shapes (91782, 13, 188) (91782,)
Epoch 24: 100%|██████████| 763/763 [00:31<00:00, 24.09it/s, loss=0.223, v_num=25, train_loss_step=0.356, val_loss_step=0.232, val_loss_epoch=0.227, val_amex_metric_epoch=0.779, train_loss_epoch=0.212, train_amex_metric_epoch=0.802]


Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=18-step=13642-v1.ckpt


### Validation data shapes (91782, 13, 188) (91782,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=18-step=13642-v1.ckpt


Validation DataLoader 0: 100%|██████████| 45/45 [00:00<00:00, 60.84it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
  val_amex_metric_epoch      0.780782433123992
     val_loss_epoch         0.22553186118602753
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


100%|██████████| 20/20 [00:01<00:00, 12.96it/s]
Restoring states from the checkpoint path at /home/cairs/code/amex/transformer/ckpt/epoch=18-step=13642-v1.ckpt


### Test data shapes (924621, 13, 188) (210,)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /home/cairs/code/amex/transformer/ckpt/epoch=18-step=13642-v1.ckpt


Predicting DataLoader 0: 100%|██████████| 226/226 [00:08<00:00, -60.61it/s]  
fold 10 0.780782433123992


In [8]:
# import pickle

# a = [val_metrics, outputs]

# with open('onecycle.pickle', 'wb') as handle:
#     pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [10]:
# import numpy as np
# np.mean(val_metrics)

0.7802671094268644

In [None]:
# import pickle

# a = [val_metrics, outputs]

# with open('onecycle.pickle', 'wb') as handle:
#     pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
# import pickle
# with open('onecycle.pickle', 'rb') as a:
#     a = pickle.load(a)
#     val_metrics = a[0]
#     outputs = a[1]

In [13]:
# import pandas as pd
# for i in range(10):
#     df = pd.DataFrame(outputs[i])
#     df2 = pd.read_csv('submission.csv')
#     df2['prediction']=df.values
#     df2.to_csv(f'results/onecycle_prediction_{i}.csv', index=False)