In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [2]:
import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F
from tqdm import tqdm
# from wtfml.utils import EarlyStopping
# from wtfml.engine import Engine
# from wtfml.data_loaders.image import ClassificationLoader
from efficientnet_pytorch import EfficientNet
import pretrainedmodels

from albumentations.pytorch import ToTensor
from torchvision import transforms

### Porting stuff from mtfml for our use

In [3]:
import torch

import numpy as np

from PIL import Image
from PIL import ImageFile


ImageFile.LOAD_TRUNCATED_IMAGES = True


class ClassificationLoader:
    def __init__(self, image_paths, targets, resize, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        image = np.array(image)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long),
        }


In [4]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [5]:
import torch
import numpy as np


class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.0001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(
                "EarlyStopping counter: {} out of {}".format(
                    self.counter, self.patience
                )
            )
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(
                "Validation score improved ({} --> {}). Saving model!".format(
                    self.val_score, epoch_score
                )
            )
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [6]:
try:
    from apex import amp
    _apex_available = True
except ImportError:
    _apex_available = False

class Engine:
    
    @staticmethod
    def train(
        data_loader,
        model,
        optimizer,
        device,
        scheduler=None,
        accumulation_steps=1,
        use_tpu=False,
        fp16=False,
    ):
        if use_tpu and not _xla_available:
            raise Exception(
                "You want to use TPUs but you dont have pytorch_xla installed"
            )
        if fp16 and not _apex_available:
            raise Exception("You want to use fp16 but you dont have apex installed")
        if fp16 and use_tpu:
            raise Exception("Apex fp16 is not available when using TPUs")
        if fp16:
            accumulation_steps = 1
        losses = AverageMeter()
        predictions = []
        model.train()
        if accumulation_steps > 1:
            optimizer.zero_grad()
        tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
        for b_idx, data in enumerate(tk0):
            for key, value in data.items():
                data[key] = value.to(device)
            if accumulation_steps == 1 and b_idx == 0:
                optimizer.zero_grad()
            _, loss = model(**data)

            if not use_tpu:
                with torch.set_grad_enabled(True):
                    if fp16:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    if (b_idx + 1) % accumulation_steps == 0:
                        optimizer.step()
                        if scheduler is not None:
                            scheduler.step()
                        if b_idx > 0:
                            optimizer.zero_grad()
            else:
                loss.backward()
                xm.optimizer_step(optimizer)
                if scheduler is not None:
                    scheduler.step()
                if b_idx > 0:
                    optimizer.zero_grad()

            losses.update(loss.item(), data_loader.batch_size)
            tk0.set_postfix(loss=losses.avg)
        return losses.avg

    @staticmethod
    def evaluate(data_loader, model, device, use_tpu=False):
        losses = AverageMeter()
        final_predictions = []
        model.eval()
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, loss = model(**data)
                predictions = predictions.cpu()
                losses.update(loss.item(), data_loader.batch_size)
                final_predictions.append(predictions)
                tk0.set_postfix(loss=losses.avg)
        return final_predictions, losses.avg

    @staticmethod
    def predict(data_loader, model, device, use_tpu=False):
        model.eval()
        final_predictions = []
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, _ = model(**data)
                predictions = predictions.cpu()
                final_predictions.append(predictions)
        return final_predictions

In [7]:
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        in_features  = arch._fc.in_features
        self.arch._fc = nn.Linear(in_features=in_features, out_features=1, bias=True)

        
    def forward(self, image,targets):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        batch_size, _, _, _ = image.shape
        x = image
        
        criterion = nn.BCEWithLogitsLoss()
        ### https://github.com/clovaai/CutMix-PyTorch/blob/master/train.py
        output = self.arch(x)
        loss = criterion(output, targets.view(-1,1).float())
        return output,loss

In [8]:
# arch = EfficientNet.from_pretrained('efficientnet-b3')
# arch
# arch._fc.in_features

In [9]:
def train(fold,bs,epochs,fp16,sz,arch='efficientnet-b3',debug=False):
    if sz is not None:
        sz = (sz,sz)
    else:
        sz = None
    
    _n = arch
    import os
    training_data_path = '../input/512x512-dataset-melanoma/512x512-dataset-melanoma/'
    df = pd.read_csv('../input/folds_08062020.csv')
    device = "cuda"
    epochs = epochs
    train_bs = bs
    valid_bs = bs//2

    df_train = df[df.fold != fold].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)

    
    arch = EfficientNet.from_pretrained(arch)
    model = Net(arch=arch)  # New model for each fold
    model = model.to(device) 
    
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.CoarseDropout(),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=30),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    if debug:
        train_images = df_train.image_id.values.tolist()[:250]
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values[:250]

        valid_images = df_valid.image_id.values.tolist()[:250]
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values[:250]
    else:
        train_images = df_train.image_id.values.tolist()
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values

        valid_images = df_valid.image_id.values.tolist()
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values
        
    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=sz,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=sz,
        augmentations=valid_aug,
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        threshold=0.001,
        mode="max"
    )
    
    model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    es = EarlyStopping(patience=5, mode="max")
    
    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device,fp16=fp16)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)
        
        if sz is not None:
            ss = sz[0]
        else:
            ss = 512
            
        es(auc, model, model_path= "../models/model_arch_{}_sz_{}_fold_{}_epoch_{}_auc_{}.bin".format(_n,ss,fold,epoch,round(auc*100,2)))
        if es.early_stop:
            print("Early stopping")
            break

In [10]:
e = 20
debug= False
bs = 4
train(0,bs,e,True,1024,'efficientnet-b3',debug=debug)
train(1,bs,e,True,1024,'efficientnet-b3',debug=debug)
train(2,bs,e,True,1024,'efficientnet-b3',debug=debug)
train(3,bs,e,True,1024,'efficientnet-b3',debug=debug)
train(4,bs,e,True,1024,'efficientnet-b3',debug=debug)

Loaded pretrained weights for efficientnet-b3


  0%|          | 0/11662 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  0%|          | 38/11662 [00:23<2:15:51,  1.43it/s, loss=0.447]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


  1%|          | 73/11662 [00:49<2:16:45,  1.41it/s, loss=0.344]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 14%|█▍        | 1630/11662 [19:52<2:01:55,  1.37it/s, loss=0.247]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 71%|███████   | 8230/11662 [1:39:45<31:16,  1.83it/s, loss=0.23]   IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 58%|█████▊    | 3404/5905 [03:36<02:05, 19.87it/s, loss=0.076] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit,

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 40%|████      | 4676/11662 [48:09<1:02:46,  1.85it/s, loss=0.204]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 91%|█████████ | 10615/11662 [1:41:40<08:48,  1.98it/s, loss=0.198]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 94%|█████████▍| 10995/11662 [1:44:58<05:45,  1.93it/s, loss=0.199]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  9%|▉         | 1088/11662 [09:24<1:28:27,  1.99it/s, loss=0.19] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 16%|█▌        | 1874/11662 [16:11<1:23:56,  1.94it/s, loss=0.191]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 65%|██████▍   | 7526/11662 [1:04:56<34:16,  2.01it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 69%|██████▉   | 8072/11662 [1:09:39<29:44,  2.01it/s, loss=0.189]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 72%|███████▏  | 8439/11662 [1:12:49<27:44,  1.94it/s, loss=0.189]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 86%|████████▌ | 5064/5905 [04:10<00:42, 19.87it/s, loss=0.15]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 48%|████▊     | 5552/11662 [47:55<51:18,  1.98it/s, loss=0.177]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 52%|█████▏    | 6007/11662 [51:51<48:50,  1.93it/s, loss=0.177]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 11662/11662 [1:40:40<00:00,  1.93it/s, loss=0.176]
 21%|██        | 1254/5905 [01:02<03:48, 20.36it/s, loss=0.0684]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 29%|██▉       | 3389/11662 [29:14<1:10:55,  1.94it/s, loss=0.173]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 85%|████████▌ | 9961/11662 [1:25:55<14:36,  1.94it/s, loss=0.17] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5905/5905 [04:53<00:00, 20.15it/s, loss=0.182]
  0%|          | 0/11662 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9140901139459687
EarlyStopping counter: 1 out of 5


  7%|▋         | 815/11662 [07:02<1:33:16,  1.94it/s, loss=0.15] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 36%|███▌      | 4202/11662 [36:16<1:02:12,  2.00it/s, loss=0.157]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 54%|█████▎    | 6267/11662 [54:06<44:50,  2.00it/s, loss=0.16]   

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 88%|████████▊ | 10310/11662 [1:34:33<11:15,  2.00it/s, loss=0.162]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 11662/11662 [1:49:56<00:00,  1.77it/s, loss=0.162]
100%|██████████| 5905/5905 [06:43<00:00, 14.63it/s, loss=0.188] 
  0%|          | 0/11662 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9203690653512073
EarlyStopping counter: 2 out of 5


  4%|▍         | 500/11662 [06:08<2:13:03,  1.40it/s, loss=0.157]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 39%|███▉      | 4562/11662 [55:14<1:23:54,  1.41it/s, loss=0.154]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 45%|████▌     | 5305/11662 [1:04:02<1:14:45,  1.42it/s, loss=0.156]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 50%|████▉     | 5807/11662 [1:10:11<1:10:01,  1.39it/s, loss=0.155]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 86%|████████▌ | 10040/11662 [2:01:58<19:33,  1.38it/s, loss=0.155] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11662/11662 [2:21:41<00:00,  1.37it/s, loss=0.156]
100%|██████████| 5905/5905 [06:50<00:00, 14.39it/s, loss=0.203] 
  0%|          | 0/11662 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9164863515757218
EarlyStopping counter: 3 out of 5


 17%|█▋        | 1956/11662 [23:51<1:55:12,  1.40it/s, loss=0.153]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 34%|███▍      | 3978/11662 [48:54<1:32:01,  1.39it/s, loss=0.15] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 55%|█████▍    | 6371/11662 [1:18:13<1:04:15,  1.37it/s, loss=0.148]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 72%|███████▏  | 8433/11662 [1:42:10<38:54,  1.38it/s, loss=0.148]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 92%|█████████▏| 10687/11662 [2:09:26<11:48,  1.38it/s, loss=0.146]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11662/11662 [2:21:15<00:00,  1.38it/s, loss=0.147]
100%|██████████| 5905/5905 [06:54<00:00, 14.26it/s, loss=0.193] 
  0%|          | 0/11662 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9113278184409885
EarlyStopping counter: 4 out of 5


 13%|█▎        | 1464/11662 [18:22<2:04:08,  1.37it/s, loss=0.121]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 31%|███       | 3573/11662 [44:30<1:37:49,  1.38it/s, loss=0.124]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 55%|█████▌    | 6420/11662 [1:19:20<45:13,  1.93it/s, loss=0.127]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 76%|███████▌  | 8881/11662 [1:48:17<33:37,  1.38it/s, loss=0.125]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 94%|█████████▍| 10944/11662 [2:12:29<08:26,  1.42it/s, loss=0.124]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11662/11662 [2:20:47<00:00,  1.38it/s, loss=0.123]
100%|██████████| 5905/5905 [06:43<00:00, 14.63it/s, loss=0.194] 


Epoch = 8, AUC = 0.9194090309698844
EarlyStopping counter: 5 out of 5
Early stopping


  0%|          | 0/11692 [00:00<?, ?it/s]

Loaded pretrained weights for efficientnet-b3
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  1%|          | 90/11692 [01:09<2:22:43,  1.35it/s, loss=0.362]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 19%|█▊        | 2182/11692 [27:41<1:55:15,  1.38it/s, loss=0.245]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 21%|██        | 2472/11692 [31:00<1:52:31,  1.37it/s, loss=0.244]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 26%|██▌       | 3040/11692 [38:09<1:43:47,  1.39it/s, loss=0.24] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 60%|██████    | 7066/11692 [1:27:21<55:28,  1.39it/s, loss=0.228]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 83%|████████▎ | 9646/11692 [1:59:03<24:13,  1.41it/s, loss=0.224]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11692/11692 [2:23:27<00:00,  1.36it/s, loss=0.221]
100%|██████████| 5846/5846 [06:36<00:00, 14.75it/s, loss=0.206] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8863162203010424
Validation score improved (-inf --> 0.8863162203010424). Saving model!


  0%|          | 5/11692 [00:04<2:44:28,  1.18it/s, loss=0.315] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 35%|███▍      | 4084/11692 [45:57<1:04:20,  1.97it/s, loss=0.203]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 43%|████▎     | 5004/11692 [54:03<56:29,  1.97it/s, loss=0.202]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 60%|██████    | 7016/11692 [1:11:49<39:33,  1.97it/s, loss=0.201]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 61%|██████▏   | 7173/11692 [1:13:12<38:12,  1.97it/s, loss=0.201]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11692/11692 [2:07:30<00:00,  1.53it/s, loss=0.199]
100%|██████████| 5846/5846 [06:35<00:00, 14.78it/s, loss=0.211] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8869651786796297
Validation score improved (0.8863162203010424 --> 0.8869651786796297). Saving model!


  7%|▋         | 798/11692 [09:38<2:07:54,  1.42it/s, loss=0.199]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 26%|██▌       | 3046/11692 [36:45<1:40:56,  1.43it/s, loss=0.189]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 60%|██████    | 7072/11692 [1:25:13<54:14,  1.42it/s, loss=0.189]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 62%|██████▏   | 7216/11692 [1:26:56<52:58,  1.41it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 85%|████████▌ | 9980/11692 [1:59:38<19:45,  1.44it/s, loss=0.186]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 98%|█████████▊| 11515/11692 [2:18:00<02:07,  1.39it/s, loss=0.186]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 23%|██▎       | 2744/11692 [24:10<1:18:37,  1.90it/s, loss=0.181]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 78%|███████▊  | 9169/11692 [1:23:14<22:16,  1.89it/s, loss=0.176]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit

Epoch = 3, AUC = 0.8906801235632118
EarlyStopping counter: 1 out of 5


  1%|          | 85/11692 [00:45<1:41:03,  1.91it/s, loss=0.212]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 59%|█████▉    | 6948/11692 [1:00:38<41:24,  1.91it/s, loss=0.169]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 43%|████▎     | 2541/5846 [02:02<02:39, 20.68it/s, loss=0.07]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 40%|███▉      | 4652/11692 [40:29<1:00:53,  1.93it/s, loss=0.159]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 99%|█████████▊| 11531/11692 [1:40:21<01:23,  1.92it/s, loss=0.158]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 17%|█▋        | 1989/11692 [17:22<1:21:14,  1.99it/s, loss=0.148]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 23%|██▎       | 2734/11692 [23:53<1:18:04,  1.91it/s, loss=0.147]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 82%|████████▏ | 9544/11692 [1:23:18<18:45,  1.91it/s, loss=0.151]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5846/5846 [04:40<00:00, 20.83it/s, loss=0.18] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9186949493194191
Validation score improved (0.9144563927499216 --> 0.9186949493194191). Saving model!


  6%|▋         | 733/11692 [06:26<1:35:22,  1.92it/s, loss=0.146]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 55%|█████▍    | 6392/11692 [1:05:44<1:01:52,  1.43it/s, loss=0.147]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 72%|███████▏  | 8400/11692 [1:24:10<27:39,  1.98it/s, loss=0.147]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 90%|█████████ | 10524/11692 [1:47:36<13:47,  1.41it/s, loss=0.145]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 93%|█████████▎| 10817/11692 [1:50:38<07:43,  1.89it/s, loss=0.144]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5846/5846 [06:18<00:00, 15.46it/s, loss=0.177]
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9194986589595905
Validation score improved (0.9186949493194191 --> 0.9194986589595905). Saving model!


  8%|▊         | 900/11692 [10:51<2:07:56,  1.41it/s, loss=0.138]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 10%|▉         | 1118/11692 [13:29<2:04:30,  1.42it/s, loss=0.134]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 11%|█         | 1230/11692 [14:51<2:05:51,  1.39it/s, loss=0.137]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 40%|████      | 4700/11692 [56:35<1:23:07,  1.40it/s, loss=0.142]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 77%|███████▋  | 9025/11692 [1:48:51<31:33,  1.41it/s, loss=0.141]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|█████████▉| 11661/11692 [2:20:12<00:22,  1.41it/s, loss=0.139]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11692/11692 [2:20:35<00:00,  1.39it/s, loss=0.139]
100%|██████████| 5846/5846 [06:46<00:00, 14.39it/s, loss=0.181] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.9259494634695106
Validation score improved (0.9194986589595905 --> 0.9259494634695106). Saving model!


 17%|█▋        | 2008/11692 [24:18<1:54:40,  1.41it/s, loss=0.135]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 35%|███▍      | 4042/11692 [49:27<1:31:49,  1.39it/s, loss=0.134]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 52%|█████▏    | 6061/11692 [1:13:59<1:08:50,  1.36it/s, loss=0.135]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 56%|█████▌    | 6546/11692 [1:19:36<1:01:22,  1.40it/s, loss=0.133]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 74%|███████▍  | 8705/11692 [1:45:07<36:00,  1.38it/s, loss=0.134]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11692/11692 [2:22:09<00:00,  1.37it/s, loss=0.134]
100%|██████████| 5846/5846 [06:49<00:00, 14.28it/s, loss=0.196] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9226807089107734
EarlyStopping counter: 1 out of 5


  9%|▉         | 1089/11692 [13:03<2:08:47,  1.37it/s, loss=0.118]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 28%|██▊       | 3223/11692 [39:18<1:41:14,  1.39it/s, loss=0.13] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 45%|████▌     | 5288/11692 [1:04:34<1:16:02,  1.40it/s, loss=0.126]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 65%|██████▍   | 7551/11692 [1:32:18<50:07,  1.38it/s, loss=0.126]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 81%|████████▏ | 9526/11692 [1:56:26<25:36,  1.41it/s, loss=0.126]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11692/11692 [2:22:53<00:00,  1.36it/s, loss=0.127]
100%|██████████| 5846/5846 [06:39<00:00, 14.64it/s, loss=0.192] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9192450388364388
EarlyStopping counter: 2 out of 5


 17%|█▋        | 1999/11692 [24:23<1:53:50,  1.42it/s, loss=0.12] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 35%|███▌      | 4145/11692 [49:52<1:04:01,  1.96it/s, loss=0.122]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 53%|█████▎    | 6249/11692 [1:13:51<45:45,  1.98it/s, loss=0.123]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 72%|███████▏  | 8375/11692 [1:39:24<39:25,  1.40it/s, loss=0.123]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 73%|███████▎  | 8589/11692 [1:42:01<37:06,  1.39it/s, loss=0.123]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11692/11692 [2:19:04<00:00,  1.40it/s, loss=0.122]
  8%|▊         | 493/5846 [00:34<06:10, 14.45it/s, loss=0.0737]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  8%|▊         | 912/11692 [11:10<2:10:47,  1.37it/s, loss=0.106]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 16%|█▋        | 1914/11692 [23:26<1:56:51,  1.39it/s, loss=0.109]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 51%|█████     | 5950/11692 [1:12:50<1:08:29,  1.40it/s, loss=0.116]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 64%|██████▍   | 7529/11692 [1:27:15<35:14,  1.97it/s, loss=0.114]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11692/11692 [2:15:45<00:00,  1.44it/s, loss=0.115]
100%|██████████| 5846/5846 [06:36<00:00, 14.75it/s, loss=0.223] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 12, AUC = 0.9271195183687095
Validation score improved (0.9259494634695106 --> 0.9271195183687095). Saving model!


  3%|▎         | 409/11692 [04:57<2:12:17,  1.42it/s, loss=0.115]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 21%|██        | 2421/11692 [28:49<1:49:29,  1.41it/s, loss=0.105]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 29%|██▊       | 3350/11692 [40:02<1:37:24,  1.43it/s, loss=0.107]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 63%|██████▎   | 7421/11692 [1:29:03<51:02,  1.39it/s, loss=0.109]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 84%|████████▍ | 9835/11692 [1:58:30<21:50,  1.42it/s, loss=0.11] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11692/11692 [2:21:06<00:00,  1.38it/s, loss=0.11] 
100%|██████████| 5846/5846 [06:38<00:00, 14.67it/s, loss=0.206] 
  0%|          | 0/11692 [00:00<?, ?it/s]

Epoch = 13, AUC = 0.921265265338507
EarlyStopping counter: 1 out of 5


  3%|▎         | 309/11692 [03:45<2:14:43,  1.41it/s, loss=0.0982]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 20%|█▉        | 2318/11692 [27:45<1:49:35,  1.43it/s, loss=0.105]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 28%|██▊       | 3252/11692 [39:00<1:38:24,  1.43it/s, loss=0.104]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 63%|██████▎   | 7327/11692 [1:25:41<37:32,  1.94it/s, loss=0.105]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 80%|████████  | 9363/11692 [1:43:52<20:00,  1.94it/s, loss=0.104]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 99%|█████████▉| 11601/11692 [2:03:50<00:46,  1.96it/s, loss=0.106]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11692/11692 [2:04:38<00:00,  1.56it/s, loss=0.106]
 26%|██▌       | 1495/5846 [01:19<03:46, 19.21it/s, loss=0.0792]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 26%|██▌       | 3004/11692 [27:14<1:19:31,  1.82it/s, loss=0.1]   IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 85%|████████▍ | 9890/11692 [1:29:56<16:22,  1.83it/s, loss=0.099] IOPub message rate exceeded.
The notebook server will temporarily stop sending outpu

Epoch = 15, AUC = 0.9106483410198309
EarlyStopping counter: 3 out of 5


  3%|▎         | 384/11692 [03:32<1:44:00,  1.81it/s, loss=0.0896]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 55%|█████▌    | 6449/11692 [58:51<46:20,  1.89it/s, loss=0.0962]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 62%|██████▏   | 7196/11692 [1:05:40<43:19,  1.73it/s, loss=0.0957]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 55%|█████▍    | 3195/5846 [03:00<02:33, 17.32it/s, loss=0.0708]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 33%|███▎      | 3837/11692 [34:51<1:08:45,  1.90it/s, loss=0.0742]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 39%|███▊      | 4526/11692 [41:08<1:04:44,  1.84it/s, loss=0.0743]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 90%|████████▉ | 10475/11692 [1:35:27<10:41,  1.90it/s, loss=0.0702]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 96%|█████████▋| 11266/11692 [1:42:41<03:53,  1.83it/s, loss=0.0698]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 17%|█▋        | 1952/11700 [18:23<1:28:47,  1.83it/s, loss=0.239]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 76%|███████▌  | 8891/11700 [1:42:54<32:57,  1.42it/s, loss=0.227]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:16:28<00:00,  1.43it/s, loss=0.222]
100%|██████████| 5830/5830 [06:29<00:00, 14.96it/s, loss=0.203] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8859618637302835
Validation score improved (-inf --> 0.8859618637302835). Saving model!


 12%|█▏        | 1382/11700 [16:41<2:01:12,  1.42it/s, loss=0.21] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 15%|█▌        | 1781/11700 [21:33<1:56:57,  1.41it/s, loss=0.213]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 41%|████      | 4806/11700 [56:53<1:20:33,  1.43it/s, loss=0.202]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 61%|██████    | 7101/11700 [1:24:10<54:12,  1.41it/s, loss=0.199]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 84%|████████▍ | 9880/11700 [1:57:45<21:24,  1.42it/s, loss=0.198]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:19:52<00:00,  1.39it/s, loss=0.197]
100%|██████████| 5830/5830 [06:51<00:00, 14.17it/s, loss=0.197] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8956403593814715
Validation score improved (0.8859618637302835 --> 0.8956403593814715). Saving model!


 20%|██        | 2375/11700 [28:35<1:48:47,  1.43it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 23%|██▎       | 2749/11700 [33:09<1:45:58,  1.41it/s, loss=0.187]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 27%|██▋       | 3166/11700 [38:14<1:40:45,  1.41it/s, loss=0.186]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 70%|███████   | 8201/11700 [1:39:52<1:37:05,  1.66s/it, loss=0.184]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 89%|████████▊ | 10383/11700 [2:16:40<15:50,  1.39it/s, loss=0.184]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:33:14<00:00,  1.27it/s, loss=0.183]  
100%|██████████| 5830/5830 [06:16<00:00, 15.47it/s, loss=0.197] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.9067849796500126
Validation score improved (0.8956403593814715 --> 0.9067849796500126). Saving model!


  9%|▉         | 1038/11700 [12:46<2:07:39,  1.39it/s, loss=0.169]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 20%|██        | 2397/11700 [29:04<1:50:51,  1.40it/s, loss=0.177]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 56%|█████▌    | 6521/11700 [1:18:46<1:02:24,  1.38it/s, loss=0.176]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 78%|███████▊  | 9173/11700 [1:50:49<29:48,  1.41it/s, loss=0.175]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:21:20<00:00,  1.38it/s, loss=0.173]
100%|██████████| 5830/5830 [06:39<00:00, 14.58it/s, loss=0.191] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.9235172737726955
Validation score improved (0.9067849796500126 --> 0.9235172737726955). Saving model!


  0%|          | 56/11700 [00:41<2:18:22,  1.40it/s, loss=0.121]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 22%|██▏       | 2561/11700 [30:30<1:47:50,  1.41it/s, loss=0.169]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 29%|██▉       | 3385/11700 [40:34<1:39:30,  1.39it/s, loss=0.167]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 67%|██████▋   | 7804/11700 [1:33:25<45:52,  1.42it/s, loss=0.163]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 86%|████████▌ | 10032/11700 [2:00:53<19:40,  1.41it/s, loss=0.164]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:22:07<00:00,  1.37it/s, loss=0.166]
100%|██████████| 5830/5830 [06:37<00:00, 14.67it/s, loss=0.181] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.921462468792444
EarlyStopping counter: 1 out of 5


  7%|▋         | 799/11700 [09:44<2:08:38,  1.41it/s, loss=0.159]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 32%|███▏      | 3686/11700 [44:49<1:34:30,  1.41it/s, loss=0.16] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 62%|██████▏   | 7292/11700 [1:22:20<37:50,  1.94it/s, loss=0.16]   

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 86%|████████▋ | 10103/11700 [1:47:28<13:49,  1.93it/s, loss=0.16]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [2:01:45<00:00,  1.60it/s, loss=0.159]
100%|██████████| 5830/5830 [04:49<00:00, 20.17it/s, loss=0.193] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9272150712538785
Validation score improved (0.9235172737726955 --> 0.9272150712538785). Saving model!


  7%|▋         | 841/11700 [07:31<1:32:43,  1.95it/s, loss=0.146]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 26%|██▌       | 2994/11700 [26:46<1:14:20,  1.95it/s, loss=0.154]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 44%|████▍     | 5131/11700 [45:48<56:04,  1.95it/s, loss=0.15]   

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 45%|████▍     | 5257/11700 [46:55<55:02,  1.95it/s, loss=0.15] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 79%|███████▉  | 9273/11700 [1:25:46<20:42,  1.95it/s, loss=0.15]   

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 99%|█████████▉| 11582/11700 [1:46:19<01:00,  1.96it/s, loss=0.151]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11700/11700 [1:47:22<00:00,  1.82it/s, loss=0.151]
100%|██████████| 5830/5830 [04:45<00:00, 20.45it/s, loss=0.182] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 6, AUC = 0.9184329468493961
EarlyStopping counter: 1 out of 5


 24%|██▍       | 2791/11700 [48:00<1:34:28,  1.57it/s, loss=0.144]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 27%|██▋       | 3132/11700 [54:30<1:24:52,  1.68it/s, loss=0.144] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 74%|███████▍  | 8680/11700 [1:47:52<27:45,  1.81it/s, loss=0.145]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 82%|████████▏ | 9622/11700 [1:56:16<18:26,  1.88it/s, loss=0.145]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5830/5830 [04:43<00:00, 20.59it/s, loss=0.179]
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9224889688954085
EarlyStopping counter: 2 out of 5


  5%|▌         | 596/11700 [05:17<1:34:43,  1.95it/s, loss=0.141]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


  6%|▋         | 751/11700 [06:40<1:37:06,  1.88it/s, loss=0.137]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 54%|█████▍    | 6335/11700 [56:12<45:50,  1.95it/s, loss=0.139]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 63%|██████▎   | 7350/11700 [1:05:15<38:39,  1.88it/s, loss=0.139]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 51%|█████▏    | 2991/5830 [02:27<02:15, 20.95it/s, loss=0.0797]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 30%|███       | 3536/11700 [31:28<1:09:40,  1.95it/s, loss=0.131]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 37%|███▋      | 4299/11700 [38:15<1:02:58,  1.96it/s, loss=0.13] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 41%|████      | 4763/11700 [42:23<1:01:28,  1.88it/s, loss=0.129]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 97%|█████████▋| 11332/11700 [1:40:51<03:20,  1.84it/s, loss=0.129]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 15%|█▌        | 1762/11700 [16:29<1:24:43,  1.96it/s, loss=0.112]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 18%|█▊        | 2103/11700 [19:32<1:25:06,  1.88it/s, loss=0.112]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 65%|██████▍   | 7604/11700 [1:08:31<34:50,  1.96it/s, loss=0.107]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 75%|███████▍  | 8755/11700 [1:18:46<26:09,  1.88it/s, loss=0.106]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 96%|█████████▋| 5615/5830 [04:36<00:10, 20.70it/s, loss=0.185] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 46%|████▌     | 5327/11700 [47:25<54:08,  1.96it/s, loss=0.0921]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 56%|█████▌    | 6521/11700 [58:03<46:46,  1.85it/s, loss=0.0918]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 11700/11700 [1:44:07<00:00,  1.87it/s, loss=0.0936]
 28%|██▊       | 1639/5830 [01:20<03:30, 19.88it/s, loss=0.0785]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 31%|███       | 3616/11700 [32:12<1:09:06,  1.95it/s, loss=0.0893]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 33%|███▎      | 3881/11700 [34:34<1:10:12,  1.86it/s, loss=0.091] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 88%|████████▊ | 10317/11700 [1:31:57<12:01,  1.92it/s, loss=0.0869]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 95%|█████████▌| 11152/11700 [1:39:23<04:41,  1.95it/s, loss=0.0877]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11700/11700 [1:44:16<00:00,  1.87it/s, loss=0.0872]
100%|██████████| 5830/5830 [04:42<00:00, 20.60it/s, loss=0.207] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 12, AUC = 0.9247380416347553
EarlyStopping counter: 2 out of 5


 14%|█▎        | 1593/11700 [14:12<1:26:45,  1.94it/s, loss=0.0817]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 35%|███▍      | 4047/11700 [36:04<1:05:23,  1.95it/s, loss=0.0826]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 64%|██████▍   | 7494/11700 [1:06:47<35:57,  1.95it/s, loss=0.086] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 85%|████████▌ | 9989/11700 [1:29:01<14:35,  1.95it/s, loss=0.0863]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11700/11700 [1:44:15<00:00,  1.87it/s, loss=0.0861]
100%|██████████| 5830/5830 [04:40<00:00, 20.80it/s, loss=0.202] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 13, AUC = 0.9263500290383562
EarlyStopping counter: 3 out of 5


 17%|█▋        | 1996/11700 [17:45<1:22:40,  1.96it/s, loss=0.0818]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 43%|████▎     | 5056/11700 [45:52<56:31,  1.96it/s, loss=0.0815]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 67%|██████▋   | 7895/11700 [1:11:03<32:22,  1.96it/s, loss=0.081] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 91%|█████████▏| 10688/11700 [1:35:48<08:35,  1.96it/s, loss=0.0807]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11700/11700 [1:44:45<00:00,  1.86it/s, loss=0.0796]
100%|██████████| 5830/5830 [04:40<00:00, 20.77it/s, loss=0.203] 
  0%|          | 0/11700 [00:00<?, ?it/s]

Epoch = 14, AUC = 0.9264599351669532
EarlyStopping counter: 4 out of 5


 10%|█         | 1213/11700 [10:47<1:28:54,  1.97it/s, loss=0.0757]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 46%|████▋     | 5420/11700 [48:17<53:39,  1.95it/s, loss=0.0782]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 51%|█████     | 5979/11700 [53:16<48:52,  1.95it/s, loss=0.0789]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 74%|███████▍  | 8646/11700 [1:17:15<26:09,  1.95it/s, loss=0.0778]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 95%|█████████▍| 11083/11700 [1:39:04<05:16,  1.95it/s, loss=0.0777]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11700/11700 [1:44:38<00:00,  1.86it/s, loss=0.0783]
100%|██████████| 5830/5830 [05:10<00:00, 18.75it/s, loss=0.207] 


Epoch = 15, AUC = 0.9240033133805505
EarlyStopping counter: 5 out of 5
Early stopping


  0%|          | 0/11702 [00:00<?, ?it/s]

Loaded pretrained weights for efficientnet-b3
Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  1%|          | 74/11702 [00:41<1:42:07,  1.90it/s, loss=0.353]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 18%|█▊        | 2161/11702 [19:54<1:23:24,  1.91it/s, loss=0.245]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 42%|████▏     | 4970/11702 [45:43<58:31,  1.92it/s, loss=0.237]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 61%|██████    | 7131/11702 [1:05:35<39:50,  1.91it/s, loss=0.229]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 63%|██████▎   | 7407/11702 [1:08:06<38:06,  1.88it/s, loss=0.228]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 82%|████████▏ | 9593/11702 [1:28:09<18:23,  1.91it/s, loss=0.224]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11702/11702 [1:47:29<00:00,  1.81it/s, loss=0.221]
100%|██████████| 5825/5825 [05:14<00:00, 18.55it/s, loss=0.204] 
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.8885840769944721
Validation score improved (-inf --> 0.8885840769944721). Saving model!


  3%|▎         | 357/11702 [03:17<1:39:07,  1.91it/s, loss=0.196]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 31%|███▏      | 3675/11702 [33:43<1:09:49,  1.92it/s, loss=0.197]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 55%|█████▍    | 6389/11702 [58:36<46:06,  1.92it/s, loss=0.198]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 77%|███████▋  | 9020/11702 [1:22:43<23:25,  1.91it/s, loss=0.196]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 99%|█████████▉| 11563/11702 [1:46:04<01:12,  1.91it/s, loss=0.196]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11702/11702 [1:47:20<00:00,  1.82it/s, loss=0.196]
100%|██████████| 5825/5825 [05:14<00:00, 18.52it/s, loss=0.216] 
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.905548811888472
Validation score improved (0.8885840769944721 --> 0.905548811888472). Saving model!


 17%|█▋        | 2014/11702 [18:19<1:24:24,  1.91it/s, loss=0.198]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 38%|███▊      | 4462/11702 [40:30<1:02:27,  1.93it/s, loss=0.192]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 59%|█████▉    | 6953/11702 [1:03:05<40:48,  1.94it/s, loss=0.186]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 77%|███████▋  | 9028/11702 [1:21:53<23:04,  1.93it/s, loss=0.183]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11702/11702 [1:46:00<00:00,  1.84it/s, loss=0.183]
100%|██████████| 5825/5825 [04:39<00:00, 20.82it/s, loss=0.19]  
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.9078516905983249
Validation score improved (0.905548811888472 --> 0.9078516905983249). Saving model!


  1%|          | 109/11702 [00:59<1:39:17,  1.95it/s, loss=0.218]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 23%|██▎       | 2717/11702 [24:29<1:17:05,  1.94it/s, loss=0.179]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 36%|███▌      | 4172/11702 [37:36<1:07:46,  1.85it/s, loss=0.176]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 88%|████████▊ | 10257/11702 [1:32:25<12:22,  1.95it/s, loss=0.172]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 94%|█████████▍| 11014/11702 [1:39:15<06:10,  1.86it/s, loss=0.171]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 19%|█▉        | 2225/11702 [20:02<1:25:04,  1.86it/s, loss=0.163]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 74%|███████▍  | 8662/11702 [1:18:15<26:03,  1.94it/s, loss=0.166]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 75%|███████▍  | 8731/11702 [1:18:52<26:54,  1.84it/s, loss=0.166]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5825/5825 [04:39<00:00, 20.83it/s, loss=0.187]
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9133620375757873
Validation score improved (0.9078516905983249 --> 0.9133620375757873). Saving model!


  2%|▏         | 240/11702 [02:10<1:45:31,  1.81it/s, loss=0.142]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 61%|██████▏   | 7188/11702 [1:04:38<38:38,  1.95it/s, loss=0.152]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 63%|██████▎   | 7333/11702 [1:05:56<39:12,  1.86it/s, loss=0.152]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 48%|████▊     | 2823/5825 [02:13<02:21, 21.24it/s, loss=0.0703]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 38%|███▊      | 4392/11702 [39:27<1:02:37,  1.95it/s, loss=0.147]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 42%|████▏     | 4956/11702 [44:31<1:00:35,  1.86it/s, loss=0.147]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 92%|█████████▏| 10731/11702 [1:36:21<08:18,  1.95it/s, loss=0.15] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11702/11702 [1:45:05<00:00,  1.86it/s, loss=0.149]
  4%|▍         | 238/5825 [00:11<04:25, 21.02it/s, loss=0.0484]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 27%|██▋       | 3202/11702 [28:46<1:16:35,  1.85it/s, loss=0.142]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 79%|███████▉  | 9251/11702 [1:23:05<20:58,  1.95it/s, loss=0.141]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 97%|█████████▋| 11388/11702 [1:42:17<02:41,  1.95it/s, loss=0.142]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11702/11702 [1:45:06<00:00,  1.86it/s, loss=0.142]
100%|██████████| 5825/5825 [04:36<00:00, 21.08it/s, loss=0.19]  
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 7, AUC = 0.9211809294296603
EarlyStopping counter: 2 out of 5


 15%|█▌        | 1768/11702 [15:53<1:25:10,  1.94it/s, loss=0.135]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 33%|███▎      | 3809/11702 [34:20<1:07:48,  1.94it/s, loss=0.136]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 40%|████      | 4710/11702 [42:27<59:58,  1.94it/s, loss=0.135]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 70%|██████▉   | 8161/11702 [1:22:45<41:47,  1.41it/s, loss=0.135]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 96%|█████████▌| 11248/11702 [2:00:23<04:27,  1.70it/s, loss=0.135] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11702/11702 [2:04:57<00:00,  1.56it/s, loss=0.135]
100%|██████████| 5825/5825 [04:40<00:00, 20.73it/s, loss=0.187] 
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 8, AUC = 0.91911157109237
EarlyStopping counter: 3 out of 5


 28%|██▊       | 3222/11702 [29:05<1:13:00,  1.94it/s, loss=0.131]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 62%|██████▏   | 7295/11702 [1:05:50<38:10,  1.92it/s, loss=0.128]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 80%|███████▉  | 9326/11702 [1:24:17<20:21,  1.95it/s, loss=0.128]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 97%|█████████▋| 11383/11702 [1:42:54<02:44,  1.94it/s, loss=0.13] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 99%|█████████▉| 11570/11702 [1:44:36<01:08,  1.94it/s, loss=0.13]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11702/11702 [1:45:48<00:00,  1.84it/s, loss=0.13]
100%|██████████| 5825/5825 [04:45<00:00, 20.39it/s, loss=0.205] 
  0%|          | 0/11702 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9163698895822047
EarlyStopping counter: 4 out of 5


 26%|██▌       | 3059/11702 [27:41<1:17:33,  1.86it/s, loss=0.11] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 36%|███▌      | 4173/11702 [37:42<1:04:34,  1.94it/s, loss=0.109]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 40%|████      | 4738/11702 [42:48<59:51,  1.94it/s, loss=0.109]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 58%|█████▊    | 6735/11702 [1:00:48<44:56,  1.84it/s, loss=0.107]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 42%|████▏     | 2433/5825 [01:56<02:40, 21.09it/s, loss=0.0663]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 35%|███▍      | 4086/11703 [37:28<1:10:56,  1.79it/s, loss=0.24] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, s

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 74%|███████▍  | 8708/11703 [1:20:07<26:16,  1.90it/s, loss=0.23] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 93%|█████████▎| 10885/11703 [1:40:13<07:09,  1.91it/s, loss=0.224]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:47:46<00:00,  1.81it/s, loss=0.223]
100%|██████████| 5823/5823 [05:07<00:00, 18.93it/s, loss=0.193] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.9007282384876321
Validation score improved (-inf --> 0.9007282384876321). Saving model!


 15%|█▍        | 1711/11703 [15:49<1:28:24,  1.88it/s, loss=0.195]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 49%|████▉     | 5714/11703 [52:51<52:20,  1.91it/s, loss=0.201]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 66%|██████▋   | 7759/11703 [1:11:48<34:31,  1.90it/s, loss=0.2]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 82%|████████▏ | 9539/11703 [1:28:16<18:51,  1.91it/s, loss=0.199]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 99%|█████████▉| 11568/11703 [1:47:05<01:11,  1.90it/s, loss=0.196]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:48:19<00:00,  1.80it/s, loss=0.196]
100%|██████████| 5823/5823 [05:09<00:00, 18.79it/s, loss=0.244] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.8807332839338842
EarlyStopping counter: 1 out of 5


 33%|███▎      | 3870/11703 [35:52<1:08:56,  1.89it/s, loss=0.185]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 40%|████      | 4713/11703 [43:40<1:01:06,  1.91it/s, loss=0.188]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 71%|███████   | 8324/11703 [1:17:03<29:28,  1.91it/s, loss=0.183]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 93%|█████████▎| 10832/11703 [1:40:17<07:45,  1.87it/s, loss=0.183]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:48:20<00:00,  1.80it/s, loss=0.183]
100%|██████████| 5823/5823 [05:09<00:00, 18.83it/s, loss=0.196] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 2, AUC = 0.9127267658130989
Validation score improved (0.9007282384876321 --> 0.9127267658130989). Saving model!


 22%|██▏       | 2557/11703 [23:45<1:22:04,  1.86it/s, loss=0.171]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 40%|███▉      | 4629/11703 [42:59<1:03:44,  1.85it/s, loss=0.172]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 72%|███████▏  | 8479/11703 [1:18:48<28:43,  1.87it/s, loss=0.173]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|█████████▉| 11680/11703 [1:49:24<00:12,  1.89it/s, loss=0.172]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:49:37<00:00,  1.78it/s, loss=0.172]
100%|██████████| 5823/5823 [05:19<00:00, 18.20it/s, loss=0.188] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.9015980194296225
EarlyStopping counter: 1 out of 5


 19%|█▉        | 2202/11703 [20:30<1:24:04,  1.88it/s, loss=0.162]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 48%|████▊     | 5641/11703 [52:32<53:18,  1.90it/s, loss=0.165]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 74%|███████▍  | 8642/11703 [1:20:26<27:36,  1.85it/s, loss=0.164]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:48:48<00:00,  1.79it/s, loss=0.163]
100%|██████████| 5823/5823 [05:01<00:00, 19.29it/s, loss=0.191] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9160503837145655
Validation score improved (0.9127267658130989 --> 0.9160503837145655). Saving model!


  1%|          | 95/11703 [00:53<1:42:01,  1.90it/s, loss=0.124] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 18%|█▊        | 2141/11703 [19:52<1:23:44,  1.90it/s, loss=0.157]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 41%|████      | 4750/11703 [44:05<1:01:09,  1.90it/s, loss=0.159]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 61%|██████    | 7161/11703 [1:06:25<39:43,  1.91it/s, loss=0.157]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 71%|███████   | 8326/11703 [1:17:13<31:18,  1.80it/s, loss=0.157]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 78%|███████▊  | 4569/5823 [03:58<01:02, 19.93it/s, loss=0.13]  IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 44%|████▍     | 5182/11703 [47:55<1:00:13,  1.80it/s, loss=0.149]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, s

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:48:11<00:00,  1.80it/s, loss=0.15] 
  1%|          | 36/5823 [00:02<05:32, 17.42it/s, loss=0.0141] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

  8%|▊         | 927/11703 [08:34<1:34:41,  1.90it/s, loss=0.145]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 20%|██        | 2384/11703 [22:02<1:26:02,  1.81it/s, loss=0.14] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 77%|███████▋  | 9011/11703 [1:23:14<24:37,  1.82it/s, loss=0.142]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 94%|█████████▎| 5449/5823 [04:42<00:20, 18.25it/s, loss=0.168] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, s

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:47:56<00:00,  1.81it/s, loss=0.136]
 29%|██▉       | 1677/5823 [01:26<03:34, 19.30it/s, loss=0.0744]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 21%|██        | 2485/11703 [22:57<1:20:45,  1.90it/s, loss=0.116]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 32%|███▏      | 3751/11703 [34:39<1:12:52,  1.82it/s, loss=0.115]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 89%|████████▊ | 10366/11703 [1:35:46<12:21,  1.80it/s, loss=0.109]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 5823/5823 [05:00<00:00, 19.37it/s, loss=0.223]
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 9, AUC = 0.9073280452340082
EarlyStopping counter: 3 out of 5


  6%|▋         | 736/11703 [06:49<1:41:41,  1.80it/s, loss=0.0865]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 49%|████▉     | 5712/11703 [52:46<52:18,  1.91it/s, loss=0.0966]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


 55%|█████▌    | 6486/11703 [59:55<45:42,  1.90it/s, loss=0.0969]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 83%|████████▎ | 9739/11703 [1:29:59<17:08,  1.91it/s, loss=0.0948]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


100%|██████████| 11703/11703 [1:48:08<00:00,  1.80it/s, loss=0.0965]
100%|██████████| 5823/5823 [04:59<00:00, 19.42it/s, loss=0.221] 
  0%|          | 0/11703 [00:00<?, ?it/s]

Epoch = 10, AUC = 0.9078878384265655
EarlyStopping counter: 4 out of 5


  9%|▉         | 1055/11703 [09:43<1:33:13,  1.90it/s, loss=0.0929]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 32%|███▏      | 3695/11703 [34:08<1:09:44,  1.91it/s, loss=0.0892]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 52%|█████▏    | 6121/11703 [56:34<48:46,  1.91it/s, loss=0.0899]  

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 8192.0


 87%|████████▋ | 10165/11703 [1:33:56<13:24,  1.91it/s, loss=0.0929]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


100%|██████████| 11703/11703 [1:48:10<00:00,  1.80it/s, loss=0.0931]
100%|██████████| 5823/5823 [05:02<00:00, 19.27it/s, loss=0.22]  


Epoch = 11, AUC = 0.9094980991141611
EarlyStopping counter: 5 out of 5
Early stopping


In [12]:
from IPython.display import FileLink, FileLinks
FileLinks('../models/')