In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [2]:
import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F
from tqdm import tqdm
# from wtfml.utils import EarlyStopping
# from wtfml.engine import Engine
# from wtfml.data_loaders.image import ClassificationLoader
from efficientnet_pytorch import EfficientNet
import pretrainedmodels

from albumentations.pytorch import ToTensor
from torchvision import transforms

### Porting stuff from mtfml for our use

In [3]:
import torch

import numpy as np

from PIL import Image
from PIL import ImageFile


ImageFile.LOAD_TRUNCATED_IMAGES = True


class ClassificationLoader:
    def __init__(self, image_paths, targets, resize, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        image = np.array(image)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long),
        }


In [4]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [5]:
import torch
import numpy as np


class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.0001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(
                "EarlyStopping counter: {} out of {}".format(
                    self.counter, self.patience
                )
            )
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(
                "Validation score improved ({} --> {}). Saving model!".format(
                    self.val_score, epoch_score
                )
            )
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [6]:
try:
    from apex import amp
    _apex_available = True
except ImportError:
    _apex_available = False

class Engine:
    
    @staticmethod
    def train(
        data_loader,
        model,
        optimizer,
        device,
        scheduler=None,
        accumulation_steps=1,
        use_tpu=False,
        fp16=False,
    ):
        if use_tpu and not _xla_available:
            raise Exception(
                "You want to use TPUs but you dont have pytorch_xla installed"
            )
        if fp16 and not _apex_available:
            raise Exception("You want to use fp16 but you dont have apex installed")
        if fp16 and use_tpu:
            raise Exception("Apex fp16 is not available when using TPUs")
        if fp16:
            accumulation_steps = 1
        losses = AverageMeter()
        predictions = []
        model.train()
        if accumulation_steps > 1:
            optimizer.zero_grad()
        tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
        for b_idx, data in enumerate(tk0):
            for key, value in data.items():
                data[key] = value.to(device)
            if accumulation_steps == 1 and b_idx == 0:
                optimizer.zero_grad()
            _, loss = model(**data)

            if not use_tpu:
                with torch.set_grad_enabled(True):
                    if fp16:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    if (b_idx + 1) % accumulation_steps == 0:
                        optimizer.step()
                        if scheduler is not None:
                            scheduler.step()
                        if b_idx > 0:
                            optimizer.zero_grad()
            else:
                loss.backward()
                xm.optimizer_step(optimizer)
                if scheduler is not None:
                    scheduler.step()
                if b_idx > 0:
                    optimizer.zero_grad()

            losses.update(loss.item(), data_loader.batch_size)
            tk0.set_postfix(loss=losses.avg)
        return losses.avg

    @staticmethod
    def evaluate(data_loader, model, device, use_tpu=False):
        losses = AverageMeter()
        final_predictions = []
        model.eval()
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, loss = model(**data)
                predictions = predictions.cpu()
                losses.update(loss.item(), data_loader.batch_size)
                final_predictions.append(predictions)
                tk0.set_postfix(loss=losses.avg)
        return final_predictions, losses.avg

    @staticmethod
    def predict(data_loader, model, device, use_tpu=False):
        model.eval()
        final_predictions = []
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, _ = model(**data)
                predictions = predictions.cpu()
                final_predictions.append(predictions)
        return final_predictions

In [7]:
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        self.arch.last_linear = nn.Linear(in_features=204800, out_features=1, bias=True)

    def forward(self, image,targets):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        batch_size, _, _, _ = image.shape
        x = image
        
        criterion = nn.BCEWithLogitsLoss()
        ### https://github.com/clovaai/CutMix-PyTorch/blob/master/train.py
        output = self.arch(x)
        loss = criterion(output, targets.view(-1,1).float())
        return output,loss

In [8]:
# arch = EfficientNet.from_pretrained('efficientnet-b3')
import pretrainedmodels
# arch = pretrainedmodels.__dict__['se_resnet152'](num_classes=1000, pretrained='imagenet')
# arch
# in_features  = arch.last_linear.in_features
# arch.last_linear = nn.Linear(in_features=in_features, out_features=1, bias=True)
# arch
# arch._fc.in_features

In [9]:
# arch.last_linear.in_features

In [10]:
import pretrainedmodels

def train(fold,bs,epochs,fp16,sz,arch='se_resnet152',debug=False):
    if sz is not None:
        sz = (sz,sz)
    else:
        sz = None
    
    _n = arch
    import os
    training_data_path = '../input/512x512-dataset-melanoma/512x512-dataset-melanoma/'
    df = pd.read_csv('../input/folds_08062020.csv')
    device = "cuda"
    epochs = epochs
    train_bs = bs
    valid_bs = bs//2

    df_train = df[df.fold != fold].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)

    arch = pretrainedmodels.__dict__[arch](num_classes=1000, pretrained='imagenet')
    model = Net(arch=arch)  # New model for each fold
    model = model.to(device)     
#     if fold == 0:
#         print("Loading existing model version for fold 0")
# #         model.load_state_dict(torch.load(bstModel))
#         model.load_state_dict(torch.load('../models/model_arch_densenet121_sz_512_fold_0_epoch_3_auc_92.53.bin'))
    

    
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.CoarseDropout(),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=30),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    if debug:
        train_images = df_train.image_id.values.tolist()[:250]
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values[:250]

        valid_images = df_valid.image_id.values.tolist()[:250]
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values[:250]
    else:
        train_images = df_train.image_id.values.tolist()
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values

        valid_images = df_valid.image_id.values.tolist()
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values
        
    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=sz,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=sz,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )
    
    model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    es = EarlyStopping(patience=5, mode="max")
    
    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device,fp16=fp16)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)
        
        if sz is not None:
            ss = sz[0]
        else:
            ss = 512
            
        es(auc, model, model_path= "../models/model_arch_{}_sz_{}_fold_{}_epoch_{}_auc_{}.bin".format(_n,ss,fold,epoch,round(auc*100,2)))
        if es.early_stop:
            print("Early stopping")
            break

In [11]:
e = 30
debug= False
bs = 8
mtype = 'se_resnet152'
train(0,bs,e,True,None,mtype,debug=debug)
train(1,bs,e,True,None,mtype,debug=debug)
train(2,bs,e,True,None,mtype,debug=debug)
train(3,bs,e,True,None,mtype,debug=debug)
train(4,bs,e,True,None,mtype,debug=debug)

  0%|          | 0/5831 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  0%|          | 3/5831 [00:01<1:00:27,  1.61it/s, loss=1.65] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 17%|█▋        | 1010/5831 [06:06<27:40,  2.90it/s, loss=0.275]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 29%|██▉       | 1687/5831 [10:10<22:46,  3.03it/s, loss=0.264]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5831/5831 [35:17<00:00,  2.75it/s, loss=0.231]
100%|██████████| 2953/2953 [02:30<00:00, 19.66it/s, loss=0.203] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8860144358847052
Validation score improved (-inf --> 0.8860144358847052). Saving model!


 33%|███▎      | 1933/5831 [11:40<23:12,  2.80it/s, loss=0.217]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 81%|████████  | 4713/5831 [28:30<06:15,  2.98it/s, loss=0.212]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5831/5831 [35:16<00:00,  2.76it/s, loss=0.211]
 70%|██████▉   | 2062/2953 [01:45<00:45, 19.78it/s, loss=0.11]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 98%|█████████▊| 5739/5831 [34:19<00:32,  2.81it/s, loss=0.2]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 53%|█████▎    | 3102/5831 [18:49<15:39,  2.90it/s, loss=0.193]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 74%|███████▍  | 4321/5831 [26:12<09:03,  2.78it/s, loss=0.193]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 44%|████▎     | 2542/5831 [15:25<19:19,  2.84it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 49%|████▉     | 2868/5831 [17:24<18:35,  2.66it/s, loss=0.182]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5831/5831 [35:22<00:00,  2.75it/s, loss=0.186]
100%|██████████| 2953/2953 [02:28<00:00, 19.88it/s, loss=0.186] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9124353348174961
EarlyStopping counter: 1 out of 5


 22%|██▏       | 1258/5831 [07:36<25:56,  2.94it/s, loss=0.176]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 26%|██▌       | 1503/5831 [09:05<24:52,  2.90it/s, loss=0.174]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 54%|█████▍    | 3177/5831 [19:14<15:01,  2.95it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5831/5831 [35:22<00:00,  2.75it/s, loss=0.182]
100%|██████████| 2953/2953 [02:27<00:00, 20.07it/s, loss=0.182] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9160406578720239
Validation score improved (0.9127985453781855 --> 0.9160406578720239). Saving model!


 59%|█████▉    | 3428/5831 [20:50<13:07,  3.05it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 81%|████████  | 4730/5831 [28:44<06:22,  2.88it/s, loss=0.179]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 88%|████████▊ | 5143/5831 [31:12<03:39,  3.13it/s, loss=0.179]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5831/5831 [35:20<00:00,  2.75it/s, loss=0.176]
100%|██████████| 2953/2953 [02:27<00:00, 20.03it/s, loss=0.188] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9110349454472331
EarlyStopping counter: 1 out of 5


 68%|██████▊   | 3939/5831 [23:59<10:57,  2.88it/s, loss=0.173]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5831/5831 [35:27<00:00,  2.74it/s, loss=0.172]
100%|██████████| 2953/2953 [02:29<00:00, 19.72it/s, loss=0.189] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9177776532077668
Validation score improved (0.9160406578720239 --> 0.9177776532077668). Saving model!


 38%|███▊      | 2210/5831 [13:26<20:18,  2.97it/s, loss=0.17] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 77%|███████▋  | 4483/5831 [27:16<07:49,  2.87it/s, loss=0.169]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5831/5831 [35:26<00:00,  2.74it/s, loss=0.168]
100%|██████████| 2953/2953 [02:30<00:00, 19.64it/s, loss=0.175] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.9230276118327274
Validation score improved (0.9177776532077668 --> 0.9230276118327274). Saving model!


 16%|█▌        | 922/5831 [05:36<27:34,  2.97it/s, loss=0.17] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 42%|████▏     | 2448/5831 [14:51<18:30,  3.05it/s, loss=0.167]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 80%|████████  | 4684/5831 [28:28<06:17,  3.04it/s, loss=0.164]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 89%|████████▊ | 5162/5831 [31:22<03:37,  3.08it/s, loss=0.165]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5831/5831 [35:26<00:00,  2.74it/s, loss=0.165]
100%|██████████| 2953/2953 [02:28<00:00, 19.84it/s, loss=0.184] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9114190659740574
EarlyStopping counter: 1 out of 5


 79%|███████▉  | 4629/5831 [28:13<06:36,  3.03it/s, loss=0.161]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5831/5831 [35:32<00:00,  2.73it/s, loss=0.162]
100%|██████████| 2953/2953 [02:32<00:00, 19.32it/s, loss=0.173] 


Epoch = 10, AUC = 0.9238955978737678
Validation score improved (0.9230276118327274 --> 0.9238955978737678). Saving model!


  1%|          | 69/5831 [00:26<33:50,  2.84it/s, loss=0.185]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 45%|████▌     | 2645/5831 [16:10<17:13,  3.08it/s, loss=0.163]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5831/5831 [35:37<00:00,  2.73it/s, loss=0.161]
100%|██████████| 2953/2953 [02:33<00:00, 19.22it/s, loss=0.177] 


Epoch = 11, AUC = 0.9252340581528403
Validation score improved (0.9238955978737678 --> 0.9252340581528403). Saving model!


 48%|████▊     | 2816/5831 [17:10<17:20,  2.90it/s, loss=0.15] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 52%|█████▏    | 3037/5831 [18:31<16:43,  2.78it/s, loss=0.151]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5831/5831 [35:36<00:00,  2.73it/s, loss=0.154]
100%|██████████| 2953/2953 [02:31<00:00, 19.46it/s, loss=0.184] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 12, AUC = 0.9233273361770887
EarlyStopping counter: 1 out of 5


 46%|████▌     | 2658/5831 [16:12<17:14,  3.07it/s, loss=0.151]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 89%|████████▉ | 5189/5831 [31:40<03:44,  2.87it/s, loss=0.15] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5831/5831 [35:33<00:00,  2.73it/s, loss=0.151]
100%|██████████| 2953/2953 [02:21<00:00, 20.81it/s, loss=0.185] 
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 13, AUC = 0.9223950631125064
EarlyStopping counter: 2 out of 5


 23%|██▎       | 1369/5831 [08:10<23:36,  3.15it/s, loss=0.153]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 60%|██████    | 3505/5831 [20:53<13:13,  2.93it/s, loss=0.147]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 78%|███████▊  | 4538/5831 [27:03<07:12,  2.99it/s, loss=0.148]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5831/5831 [34:39<00:00,  2.80it/s, loss=0.148]
 72%|███████▏  | 2133/2953 [01:39<00:41, 19.89it/s, loss=0.0998]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 89%|████████▊ | 5174/5831 [30:38<03:35,  3.06it/s, loss=0.145]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5831/5831 [34:34<00:00,  2.81it/s, loss=0.145]
 10%|▉         | 286/2953 [00:14<02:16, 19.55it/s, loss=0.048] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 63%|██████▎   | 3694/5831 [22:04<11:24,  3.12it/s, loss=0.143]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 76%|███████▌  | 4425/5831 [26:27<08:19,  2.81it/s, loss=0.143]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 30%|███       | 1762/5831 [10:30<23:52,  2.84it/s, loss=0.135]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 55%|█████▌    | 3215/5831 [19:10<15:35,  2.80it/s, loss=0.134]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 35%|███▌      | 2055/5831 [12:16<23:16,  2.70it/s, loss=0.137]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 2953/2953 [02:23<00:00, 20.55it/s, loss=0.18]  
  0%|          | 0/5831 [00:00<?, ?it/s]

Epoch = 18, AUC = 0.9218033555601823
EarlyStopping counter: 3 out of 5


  7%|▋         | 381/5831 [02:17<32:14,  2.82it/s, loss=0.131]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5831/5831 [34:46<00:00,  2.79it/s, loss=0.133]
 59%|█████▉    | 1742/2953 [01:24<00:57, 21.24it/s, loss=0.0738]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 98%|█████████▊| 5707/5831 [34:01<00:43,  2.83it/s, loss=0.132]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the clie

Epoch = 22, AUC = 0.9280901370803993
EarlyStopping counter: 3 out of 5


 23%|██▎       | 1324/5831 [07:54<26:53,  2.79it/s, loss=0.126]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 98%|█████████▊| 2905/2953 [02:21<00:02, 21.04it/s, loss=0.183] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5831/5831 [34:46<00:00,  2.79it/s, loss=0.102]
 31%|███       | 920/2953 [00:45<01:39, 20.52it/s, loss=0.0645]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the cli

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 52%|█████▏    | 3069/5846 [19:50<17:23,  2.66it/s, loss=0.203]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 23%|██▎       | 1369/5846 [08:52<29:19,  2.55it/s, loss=0.198]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 27%|██▋       | 1601/5846 [10:22<27:30,  2.57it/s, loss=0.198]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 98%|█████████▊| 2877/2923 [02:38<00:02, 17.10it/s, loss=0.194] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5846/5846 [37:49<00:00,  2.58it/s, loss=0.19] 
100%|██████████| 2923/2923 [02:40<00:00, 18.21it/s, loss=0.189] 


Epoch = 3, AUC = 0.907318993842418
Validation score improved (0.8972234404260233 --> 0.907318993842418). Saving model!


 29%|██▉       | 1701/5846 [10:53<25:25,  2.72it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 39%|███▉      | 2266/5846 [14:30<20:42,  2.88it/s, loss=0.181]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 46%|████▌     | 2680/5846 [17:09<20:14,  2.61it/s, loss=0.183]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 2923/2923 [02:37<00:00, 18.50it/s, loss=0.187]
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.907588665293234
Validation score improved (0.907318993842418 --> 0.907588665293234). Saving model!


 23%|██▎       | 1318/5846 [08:28<27:55,  2.70it/s, loss=0.18] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 53%|█████▎    | 3099/5846 [19:55<15:35,  2.94it/s, loss=0.181]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5846/5846 [37:34<00:00,  2.59it/s, loss=0.178]
100%|██████████| 2923/2923 [02:34<00:00, 18.89it/s, loss=0.192] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9038951679101492
EarlyStopping counter: 1 out of 5


  3%|▎         | 185/5846 [01:10<31:46,  2.97it/s, loss=0.181]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


  3%|▎         | 194/5846 [01:13<31:39,  2.98it/s, loss=0.249]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 75%|███████▍  | 4370/5846 [27:39<09:12,  2.67it/s, loss=0.175]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5846/5846 [37:00<00:00,  2.63it/s, loss=0.176]
100%|██████████| 2923/2923 [02:34<00:00, 18.89it/s, loss=0.188] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9053523632264541
EarlyStopping counter: 2 out of 5


 33%|███▎      | 1952/5846 [12:28<22:10,  2.93it/s, loss=0.167]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5846/5846 [37:26<00:00,  2.60it/s, loss=0.169]
100%|██████████| 2923/2923 [02:33<00:00, 18.98it/s, loss=0.186] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9117074085337271
Validation score improved (0.907588665293234 --> 0.9117074085337271). Saving model!


 15%|█▍        | 853/5846 [05:26<28:14,  2.95it/s, loss=0.158] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 55%|█████▍    | 3196/5846 [20:29<16:53,  2.61it/s, loss=0.162]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5846/5846 [37:25<00:00,  2.60it/s, loss=0.165]
100%|██████████| 2923/2923 [02:35<00:00, 18.74it/s, loss=0.182] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.9108930894690028
EarlyStopping counter: 1 out of 5


  3%|▎         | 194/5846 [01:14<35:12,  2.68it/s, loss=0.181] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 43%|████▎     | 2507/5846 [16:01<20:42,  2.69it/s, loss=0.164]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 61%|██████▏   | 3593/5846 [22:56<14:44,  2.55it/s, loss=0.162]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5846/5846 [37:17<00:00,  2.61it/s, loss=0.159]
100%|██████████| 2923/2923 [02:24<00:00, 20.25it/s, loss=0.195] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9094436682999998
EarlyStopping counter: 2 out of 5


 28%|██▊       | 1647/5846 [10:02<22:41,  3.08it/s, loss=0.155]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 57%|█████▋    | 3349/5846 [20:24<13:39,  3.05it/s, loss=0.156]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 61%|██████    | 3565/5846 [21:43<12:26,  3.05it/s, loss=0.156]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


100%|██████████| 5846/5846 [36:00<00:00,  2.71it/s, loss=0.156]
100%|██████████| 2923/2923 [02:33<00:00, 18.99it/s, loss=0.184] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9176088552110796
Validation score improved (0.9117074085337271 --> 0.9176088552110796). Saving model!


100%|██████████| 5846/5846 [37:04<00:00,  2.63it/s, loss=0.155]
100%|██████████| 2923/2923 [02:35<00:00, 18.78it/s, loss=0.174] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 11, AUC = 0.9216993371396167
Validation score improved (0.9176088552110796 --> 0.9216993371396167). Saving model!


 11%|█         | 640/5846 [04:04<29:12,  2.97it/s, loss=0.153] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 44%|████▍     | 2562/5846 [16:19<18:28,  2.96it/s, loss=0.152]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 82%|████████▏ | 4774/5846 [30:25<06:14,  2.86it/s, loss=0.151]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5846/5846 [37:15<00:00,  2.62it/s, loss=0.151]
100%|██████████| 2923/2923 [02:35<00:00, 18.81it/s, loss=0.19]  
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 12, AUC = 0.9158773239555775
EarlyStopping counter: 1 out of 5


 59%|█████▉    | 3449/5846 [21:59<13:37,  2.93it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 99%|█████████▉| 5776/5846 [36:48<00:27,  2.55it/s, loss=0.147]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5846/5846 [37:15<00:00,  2.61it/s, loss=0.148]
100%|██████████| 2923/2923 [02:36<00:00, 18.74it/s, loss=0.176] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 13, AUC = 0.9236015337935324
Validation score improved (0.9216993371396167 --> 0.9236015337935324). Saving model!


 35%|███▌      | 2074/5846 [13:13<21:54,  2.87it/s, loss=0.14] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 89%|████████▉ | 5192/5846 [33:05<03:56,  2.77it/s, loss=0.143]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5846/5846 [37:15<00:00,  2.62it/s, loss=0.144]
100%|██████████| 2923/2923 [02:35<00:00, 18.78it/s, loss=0.182] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 14, AUC = 0.9174813134650518
EarlyStopping counter: 1 out of 5


  8%|▊         | 459/5846 [02:55<30:59,  2.90it/s, loss=0.138]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 34%|███▍      | 2013/5846 [12:49<22:06,  2.89it/s, loss=0.145]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 35%|███▍      | 2038/5846 [12:58<21:32,  2.95it/s, loss=0.146]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 38%|███▊      | 2236/5846 [14:14<20:23,  2.95it/s, loss=0.214]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


100%|██████████| 5846/5846 [37:14<00:00,  2.62it/s, loss=0.18] 
100%|██████████| 2923/2923 [02:36<00:00, 18.71it/s, loss=0.178] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 15, AUC = 0.9206333186234272
EarlyStopping counter: 2 out of 5


100%|██████████| 5846/5846 [37:17<00:00,  2.61it/s, loss=0.137]
100%|██████████| 2923/2923 [02:36<00:00, 18.63it/s, loss=0.19]  
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 16, AUC = 0.9203491506744073
EarlyStopping counter: 3 out of 5


 14%|█▍        | 842/5846 [05:22<29:14,  2.85it/s, loss=0.133]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 53%|█████▎    | 3112/5846 [19:44<16:31,  2.76it/s, loss=0.133]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 94%|█████████▍| 5499/5846 [34:48<01:56,  2.97it/s, loss=0.135]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5846/5846 [36:59<00:00,  2.63it/s, loss=0.135]
100%|██████████| 2923/2923 [02:33<00:00, 19.03it/s, loss=0.218] 
  0%|          | 0/5846 [00:00<?, ?it/s]

Epoch = 17, AUC = 0.8979741486737076
EarlyStopping counter: 4 out of 5


 30%|██▉       | 1736/5846 [10:54<22:59,  2.98it/s, loss=0.109]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 64%|██████▍   | 3761/5846 [23:41<11:46,  2.95it/s, loss=0.113]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5846/5846 [36:50<00:00,  2.65it/s, loss=0.114]
100%|██████████| 2923/2923 [02:33<00:00, 19.08it/s, loss=0.188] 


Epoch = 18, AUC = 0.923330124592143
EarlyStopping counter: 5 out of 5
Early stopping


  0%|          | 0/5850 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  0%|          | 4/5850 [00:02<55:19,  1.76it/s, loss=1.19]   

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


  1%|          | 39/5850 [00:16<34:36,  2.80it/s, loss=0.574]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


  3%|▎         | 161/5850 [01:05<34:46,  2.73it/s, loss=0.366]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5850/5850 [39:27<00:00,  2.47it/s, loss=0.231]
100%|██████████| 2915/2915 [02:50<00:00, 17.07it/s, loss=0.211] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8754629824589819
Validation score improved (-inf --> 0.8754629824589819). Saving model!


 12%|█▏        | 711/5850 [04:46<30:28,  2.81it/s, loss=0.225] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 49%|████▊     | 2844/5850 [19:18<17:58,  2.79it/s, loss=0.212]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 72%|███████▏  | 4192/5850 [28:26<10:58,  2.52it/s, loss=0.212]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5850/5850 [40:22<00:00,  2.41it/s, loss=0.211]
100%|██████████| 2915/2915 [03:09<00:00, 15.41it/s, loss=0.198] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8981074858799547
Validation score improved (0.8754629824589819 --> 0.8981074858799547). Saving model!


 31%|███▏      | 1842/5850 [13:10<27:33,  2.42it/s, loss=0.205]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 32%|███▏      | 1845/5850 [13:11<26:17,  2.54it/s, loss=0.206]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5850/5850 [42:02<00:00,  2.32it/s, loss=0.203]
100%|██████████| 2915/2915 [03:04<00:00, 15.76it/s, loss=0.197] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.8970320717652313
EarlyStopping counter: 1 out of 5


 26%|██▋       | 1539/5850 [10:51<32:10,  2.23it/s, loss=0.198]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 78%|███████▊  | 4576/5850 [31:43<08:09,  2.60it/s, loss=0.194]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5850/5850 [40:26<00:00,  2.41it/s, loss=0.194]
100%|██████████| 2915/2915 [02:55<00:00, 16.58it/s, loss=0.194] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.904991543012631
Validation score improved (0.8981074858799547 --> 0.904991543012631). Saving model!


 22%|██▏       | 1311/5850 [08:43<26:24,  2.87it/s, loss=0.193]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 22%|██▏       | 1315/5850 [08:45<25:35,  2.95it/s, loss=0.204]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 23%|██▎       | 1338/5850 [08:54<26:11,  2.87it/s, loss=0.253]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


100%|██████████| 5850/5850 [39:22<00:00,  2.48it/s, loss=0.228]
100%|██████████| 2915/2915 [02:50<00:00, 17.06it/s, loss=0.189] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9097027444138764
Validation score improved (0.904991543012631 --> 0.9097027444138764). Saving model!


 83%|████████▎ | 4837/5850 [32:16<05:59,  2.81it/s, loss=0.183]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5850/5850 [38:59<00:00,  2.50it/s, loss=0.185]
100%|██████████| 2915/2915 [02:49<00:00, 17.19it/s, loss=0.184] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9134988789574884
Validation score improved (0.9097027444138764 --> 0.9134988789574884). Saving model!


 18%|█▊        | 1044/5850 [07:06<31:43,  2.52it/s, loss=0.185]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 47%|████▋     | 2764/5850 [18:55<19:16,  2.67it/s, loss=0.18] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 65%|██████▌   | 3810/5850 [26:01<12:36,  2.70it/s, loss=0.178]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 85%|████████▌ | 4983/5850 [33:59<05:14,  2.76it/s, loss=0.181]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 86%|████████▌ | 5015/5850 [34:12<05:06,  2.73it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


100%|██████████| 5850/5850 [39:53<00:00,  2.44it/s, loss=0.183]
100%|██████████| 2915/2915 [02:49<00:00, 17.17it/s, loss=0.181] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9213666306483073
Validation score improved (0.9134988789574884 --> 0.9213666306483073). Saving model!


 75%|███████▍  | 4359/5850 [29:48<08:59,  2.76it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 75%|███████▍  | 4360/5850 [29:49<08:23,  2.96it/s, loss=0.186]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


 75%|███████▍  | 4378/5850 [29:56<09:06,  2.69it/s, loss=0.194]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 1024.0


100%|██████████| 5850/5850 [39:54<00:00,  2.44it/s, loss=0.191]
100%|██████████| 2915/2915 [02:48<00:00, 17.33it/s, loss=0.184] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9146572664147694
EarlyStopping counter: 1 out of 5


 81%|████████▏ | 4764/5850 [31:23<07:02,  2.57it/s, loss=0.172]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 47%|████▋     | 2753/5850 [17:52<19:12,  2.69it/s, loss=0.166]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 56%|█████▌    | 3276/5850 [21:16<16:34,  2.59it/s, loss=0.167]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 33%|███▎      | 1921/5850 [12:29<22:38,  2.89it/s, loss=0.172]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 34%|███▍      | 2011/5850 [13:05<24:54,  2.57it/s, loss=0.171]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 2915/2915 [02:41<00:00, 18.07it/s, loss=0.18] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9155608104824994
EarlyStopping counter: 1 out of 5


 15%|█▍        | 851/5850 [05:31<32:58,  2.53it/s, loss=0.17]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 71%|███████   | 2063/2915 [01:54<00:46, 18.36it/s, loss=0.103] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 96%|█████████▌| 5614/5850 [36:32<01:21,  2.89it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


100%|█████████▉| 5827/5850 [37:55<00:07,  2.90it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5850/5850 [38:04<00:00,  2.56it/s, loss=0.144]
 13%|█▎        | 371/2915 [00:20<02:25, 17.45it/s, loss=0.0748]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 68%|██████▊   | 3978/5850 [25:53<10:43,  2.91it/s, loss=0.138]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


 76%|███████▌  | 4439/5850 [28:53<08:45,  2.69it/s, loss=0.138]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 83%|████████▎ | 4879/5850 [31:43<06:25,  2.52it/s, loss=0.138]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 61%|██████    | 3566/5850 [23:03<14:35,  2.61it/s, loss=0.134]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  8%|▊         | 494/5850 [03:12<33:15,  2.68it/s, loss=0.141]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 33%|███▎      | 1913/5850 [12:20<25:23,  2.58it/s, loss=0.137]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 2915/2915 [02:42<00:00, 17.99it/s, loss=0.169] 
  0%|          | 0/5850 [00:00<?, ?it/s]

Epoch = 15, AUC = 0.9291640886930889
EarlyStopping counter: 3 out of 5


 10%|█         | 595/5850 [03:52<32:30,  2.69it/s, loss=0.134] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 12%|█▏        | 693/5850 [04:31<33:35,  2.56it/s, loss=0.132]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5850/5850 [38:00<00:00,  2.57it/s, loss=0.13]
 67%|██████▋   | 1943/2915 [01:47<00:58, 16.48it/s, loss=0.0986]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5850/5850 [38:01<00:00,  2.56it/s, loss=0.129]
 10%|▉         | 277/2915 [00:15<02:31, 17.40it/s, loss=0.0783]IOPub message rate exceeded.
The n

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 75%|███████▍  | 4365/5851 [30:06<09:53,  2.50it/s, loss=0.231]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 52%|█████▏    | 3067/5851 [21:02<20:05,  2.31it/s, loss=0.21] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 26%|██▌       | 1500/5851 [10:31<26:55,  2.69it/s, loss=0.206]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 31%|███       | 1792/5851 [12:34<28:27,  2.38it/s, loss=0.207]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 92%|█████████▏| 5367/5851 [37:43<02:59,  2.70it/s, loss=0.199]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5851/5851 [41:07<00:00,  2.37it/s, loss=0.199]
100%|██████████| 2913/2913 [03:01<00:00, 16.08it/s, loss=0.196] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.8975670874120443
Validation score improved (0.8917900126104717 --> 0.8975670874120443). Saving model!


 15%|█▌        | 906/5851 [06:24<34:47,  2.37it/s, loss=0.188]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 94%|█████████▍| 5511/5851 [38:51<02:19,  2.43it/s, loss=0.193]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 94%|█████████▍| 5513/5851 [38:52<02:14,  2.51it/s, loss=0.193]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 5851/5851 [41:16<00:00,  2.36it/s, loss=0.193]
100%|██████████| 2913/2913 [03:02<00:00, 15.95it/s, loss=0.184] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.9091645692561605
Validation score improved (0.8975670874120443 --> 0.9091645692561605). Saving model!


 34%|███▍      | 1989/5851 [14:15<24:23,  2.64it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 35%|███▌      | 2051/5851 [14:42<24:06,  2.63it/s, loss=0.212]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


100%|██████████| 5851/5851 [41:40<00:00,  2.34it/s, loss=0.2]  
100%|██████████| 2913/2913 [03:06<00:00, 15.65it/s, loss=0.191] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9064725363693582
EarlyStopping counter: 1 out of 5


  2%|▏         | 106/5851 [00:46<36:37,  2.61it/s, loss=0.202] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 42%|████▏     | 2440/5851 [17:16<20:57,  2.71it/s, loss=0.187]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 82%|████████▏ | 4775/5851 [33:49<06:41,  2.68it/s, loss=0.186]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


100%|██████████| 5851/5851 [41:27<00:00,  2.35it/s, loss=0.191]
100%|██████████| 2913/2913 [03:03<00:00, 15.85it/s, loss=0.183] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9128659693380505
Validation score improved (0.9091645692561605 --> 0.9128659693380505). Saving model!


 78%|███████▊  | 4537/5851 [32:08<08:16,  2.64it/s, loss=0.18] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 80%|████████  | 4710/5851 [33:21<07:12,  2.64it/s, loss=0.18] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 81%|████████  | 4728/5851 [33:28<06:59,  2.67it/s, loss=0.182]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


 84%|████████▎ | 4889/5851 [34:37<06:00,  2.67it/s, loss=0.183]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 1024.0


 87%|████████▋ | 5068/5851 [35:53<05:09,  2.53it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 512.0


 87%|████████▋ | 5071/5851 [35:55<04:49,  2.70it/s, loss=0.192]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 256.0


100%|██████████| 5851/5851 [41:30<00:00,  2.35it/s, loss=0.207]
100%|██████████| 2913/2913 [03:06<00:00, 15.60it/s, loss=0.33] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.8354957189789457
EarlyStopping counter: 1 out of 5


100%|██████████| 5851/5851 [40:57<00:00,  2.38it/s, loss=0.199]
100%|██████████| 2913/2913 [03:00<00:00, 16.15it/s, loss=0.189] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9035894329151888
EarlyStopping counter: 2 out of 5


100%|██████████| 5851/5851 [40:51<00:00,  2.39it/s, loss=0.182]
100%|██████████| 2913/2913 [02:59<00:00, 16.24it/s, loss=0.2]   
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.8923652678480531
EarlyStopping counter: 3 out of 5


 94%|█████████▍| 5519/5851 [38:35<02:01,  2.73it/s, loss=0.175]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


100%|██████████| 5851/5851 [40:55<00:00,  2.38it/s, loss=0.175]
100%|██████████| 2913/2913 [03:00<00:00, 16.17it/s, loss=0.2]   
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9050323082959366
EarlyStopping counter: 4 out of 5


  1%|          | 63/5851 [00:26<38:19,  2.52it/s, loss=0.196] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 41%|████      | 2406/5851 [16:38<23:03,  2.49it/s, loss=0.158]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 90%|█████████ | 5277/5851 [36:43<04:28,  2.14it/s, loss=0.159]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [40:52<00:00,  2.39it/s, loss=0.159]
100%|██████████| 2913/2913 [03:08<00:00, 15.47it/s, loss=0.181] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9149129881170269
Validation score improved (0.9128659693380505 --> 0.9149129881170269). Saving model!


 59%|█████▊    | 3428/5851 [24:40<14:43,  2.74it/s, loss=0.15]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


 70%|██████▉   | 4080/5851 [29:25<11:36,  2.54it/s, loss=0.149]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [42:17<00:00,  2.31it/s, loss=0.152]
100%|██████████| 2913/2913 [03:10<00:00, 15.27it/s, loss=0.18]  
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 11, AUC = 0.9175421358438353
Validation score improved (0.9149129881170269 --> 0.9175421358438353). Saving model!


  1%|▏         | 84/5851 [00:37<39:54,  2.41it/s, loss=0.145]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 73%|███████▎  | 4286/5851 [30:52<10:25,  2.50it/s, loss=0.15]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [42:05<00:00,  2.32it/s, loss=0.149]
100%|██████████| 2913/2913 [03:11<00:00, 15.24it/s, loss=0.18]  
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 12, AUC = 0.9163150856761539
EarlyStopping counter: 1 out of 5


 17%|█▋        | 1009/5851 [07:16<32:55,  2.45it/s, loss=0.14]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 56%|█████▌    | 3251/5851 [23:40<17:01,  2.54it/s, loss=0.147]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 91%|█████████ | 5336/5851 [38:52<03:20,  2.57it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [42:33<00:00,  2.29it/s, loss=0.145]
100%|██████████| 2913/2913 [03:10<00:00, 15.27it/s, loss=0.177] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 13, AUC = 0.9191589904043005
Validation score improved (0.9175421358438353 --> 0.9191589904043005). Saving model!


 45%|████▍     | 2622/5851 [19:17<21:43,  2.48it/s, loss=0.144] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 84%|████████▍ | 4918/5851 [36:20<06:25,  2.42it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [43:05<00:00,  2.26it/s, loss=0.144]
100%|██████████| 2913/2913 [03:09<00:00, 15.34it/s, loss=0.18]  
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 14, AUC = 0.9192580554310856
EarlyStopping counter: 1 out of 5


 36%|███▌      | 2086/5851 [15:15<25:11,  2.49it/s, loss=0.142]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 70%|███████   | 4108/5851 [30:10<11:20,  2.56it/s, loss=0.141]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 5851/5851 [44:34<00:00,  2.19it/s, loss=0.141]  
100%|██████████| 2913/2913 [03:02<00:00, 15.92it/s, loss=0.181] 
  0%|          | 0/5851 [00:00<?, ?it/s]

Epoch = 15, AUC = 0.9166438162244828
EarlyStopping counter: 2 out of 5


100%|██████████| 5852/5852 [44:15<00:00,  2.20it/s, loss=0.179]
100%|██████████| 2912/2912 [03:21<00:00, 14.48it/s, loss=0.18]  
  0%|          | 0/5852 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9167735022844141
Validation score improved (0.9074943029278484 --> 0.9167735022844141). Saving model!


 17%|█▋        | 1007/5852 [07:34<31:23,  2.57it/s, loss=0.171]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 24%|██▍       | 1419/5852 [10:39<29:22,  2.51it/s, loss=0.174]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 93%|█████████▎| 5431/5852 [40:49<02:59,  2.34it/s, loss=0.174]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 5852/5852 [43:49<00:00,  2.23it/s, loss=0.175]
100%|██████████| 2912/2912 [03:14<00:00, 14.97it/s, loss=0.188] 
  0%|          | 0/5852 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.906174760480937
EarlyStopping counter: 1 out of 5


 28%|██▊       | 1667/5852 [12:28<28:22,  2.46it/s, loss=0.167]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 29%|██▉       | 1710/5852 [12:47<27:07,  2.54it/s, loss=0.179]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 34%|███▍      | 2010/5852 [15:02<25:24,  2.52it/s, loss=0.175]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 4096.0


 36%|███▌      | 2095/5852 [15:41<24:48,  2.52it/s, loss=0.175]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 2048.0


 41%|████      | 2392/5852 [17:54<22:59,  2.51it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 1024.0


 49%|████▉     | 2886/5852 [21:37<19:27,  2.54it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 512.0


100%|██████████| 5852/5852 [43:52<00:00,  2.22it/s, loss=0.198]
100%|██████████| 2912/2912 [03:22<00:00, 14.38it/s, loss=0.19]  
  0%|          | 0/5852 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.9054058884639873
EarlyStopping counter: 2 out of 5


 51%|█████     | 2960/5852 [22:14<21:33,  2.24it/s, loss=0.187]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 27%|██▋       | 1594/5852 [12:00<32:28,  2.19it/s, loss=0.17] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5852/5852 [44:01<00:00,  2.22it/s, loss=0.168]
100%|██████████| 2912/2912 [03:22<00:00, 14.40it/s, loss=0.181] 
  0%|          | 0/5852 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9143240870721144
EarlyStopping counter: 4 out of 5


100%|██████████| 5852/5852 [42:37<00:00,  2.29it/s, loss=0.153]
  0%|          | 0/2912 [00:00<?, ?it/s]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 2912/2912 [03:15<00:00, 14.92it/s, loss=0.177] 

Epoch = 11, AUC = 0.9167267248537565
EarlyStopping counter: 5 out of 5
Early stopping





In [12]:
# from IPython.display import FileLink, FileLinks
# FileLinks('../models/')