In [1]:
# !pip install efficientnet_pytorch
# !pip install pretrainedmodels
# !pip install albumentations
# !pip install pandas
# !pip install sklearn

In [2]:

import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
import pretrainedmodels

from albumentations.pytorch import ToTensor
from torchvision import transforms

import albumentations as A
import random
import cv2

In [3]:
cols = ['head/neck','lower extremity','oral/genital','palms/soles','torso','unknown_anatomy','upper extremity','female','male','unknown_sex','age__0.0','age__10.0','age__15.0','age__20.0','age__25.0','age__30.0','age__35.0','age__40.0','age__45.0','age__50.0','age__55.0','age__60.0','age__65.0','age__70.0','age__75.0','age__80.0','age__85.0','age__90.0','n_images','image_size']

In [4]:
import torch

import numpy as np

from PIL import Image
from PIL import ImageFile


ImageFile.LOAD_TRUNCATED_IMAGES = True


class ClassificationLoader:
    def __init__(self, image_paths, targets, resize,augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations
#         self.tabularDF = tabularDF
#         self.cols = cols

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        image = np.array(image)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
#         tabFeats = self.tabularDF.loc[item,cols].values.astype(float)
#         print(tabFeats)
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long)
#             "tabfeats":torch.tensor(np.array(tabFeats),dtype=torch.float)
        }


In [5]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [6]:
import torch
import numpy as np


class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.0001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(
                "EarlyStopping counter: {} out of {}".format(
                    self.counter, self.patience
                )
            )
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(
                "Validation score improved ({} --> {}). Saving model!".format(
                    self.val_score, epoch_score
                )
            )
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [7]:
class Engine:
        
    @staticmethod
    def train(
        data_loader,
        model,
        optimizer,
        device,
        scheduler=None,
        accumulation_steps=1,
        use_tpu=False,
        fp16=False,
    ):

        try:
            from apex import amp
            _apex_available = True
        except ImportError:
            _apex_available = False

        if use_tpu and not _xla_available:
            raise Exception(
                "You want to use TPUs but you dont have pytorch_xla installed"
            )
        if fp16 and not _apex_available:
            raise Exception("You want to use fp16 but you dont have apex installed")
        if fp16 and use_tpu:
            raise Exception("Apex fp16 is not available when using TPUs")
        if fp16:
            accumulation_steps = 1
        losses = AverageMeter()
        predictions = []
        model.train()
        if accumulation_steps > 1:
            optimizer.zero_grad()
        tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
        for b_idx, data in enumerate(tk0):
            for key, value in data.items():
                data[key] = value.to(device)
            if accumulation_steps == 1 and b_idx == 0:
                optimizer.zero_grad()
            _, loss = model(**data)

            if not use_tpu:
                with torch.set_grad_enabled(True):
                    if fp16:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    if (b_idx + 1) % accumulation_steps == 0:
                        optimizer.step()
                        if scheduler is not None:
                            scheduler.step()
                        if b_idx > 0:
                            optimizer.zero_grad()
            else:
                loss.backward()
                xm.optimizer_step(optimizer)
                if scheduler is not None:
                    scheduler.step()
                if b_idx > 0:
                    optimizer.zero_grad()

            losses.update(loss.item(), data_loader.batch_size)
            tk0.set_postfix(loss=losses.avg)
        return losses.avg

    @staticmethod
    def evaluate(data_loader, model, device, use_tpu=False):
        losses = AverageMeter()
        final_predictions = []
        model.eval()
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, loss = model(**data)
                predictions = predictions.cpu()
                losses.update(loss.item(), data_loader.batch_size)
                final_predictions.append(predictions)
                tk0.set_postfix(loss=losses.avg)
        return final_predictions, losses.avg

    @staticmethod
    def predict(data_loader, model, device, use_tpu=False):
        model.eval()
        final_predictions = []
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, _ = model(**data)
                predictions = predictions.cpu()
                final_predictions.append(predictions)
        return final_predictions

In [8]:
class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)

In [9]:
class LabelSmoothing(nn.Module):
    def __init__(self, smoothing = 0.05):
        super(LabelSmoothing, self).__init__()
        self.smoothing = smoothing
        self.confidence = 1.0 - smoothing

    def forward(self, x, target):
        x = x.float()
        target = target.float()
        
        target = target* (1 - self.smoothing) + 0.5 * self.smoothing
        loss = F.binary_cross_entropy_with_logits(x, target.type_as(x))

        return loss.mean()

# y_smo = y.float() * (1 - label_smoothing) + 0.5 * label_smoothing
#         loss  = F.binary_cross_entropy_with_logits(y_hat, y_smo.type_as(y_hat),
#                                                    pos_weight=torch.tensor(pos_weight))

In [10]:
# class LabelSmoothing(nn.Module):
#     def __init__(self, smoothing = 0.05):
#         super(LabelSmoothing, self).__init__()
#         self.confidence = 1.0 - smoothing
#         self.smoothing = smoothing

#     def forward(self, x, target):
#         x = x.float()
#         target = target.float()
#         logprobs = torch.nn.functional.log_softmax(x, dim = -1)

#         nll_loss = -logprobs * target
#         nll_loss = nll_loss.sum(-1)

# #         smooth_loss = -logprobs.mean(dim=-1)
#         smooth_loss = -logprobs * (1-target)
#         smooth_loss = smooth_loss .sum(-1)
#         loss = self.confidence * nll_loss + self.smoothing * smooth_loss

#         return loss.mean()

In [11]:
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        
        in_features  = arch._fc.in_features
        self.arch._fc = nn.Linear(in_features=in_features, out_features=1, bias=True)
                                
    def forward(self, image,targets):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        batch_size, _, _, _ = image.shape
        x = image
#         criterion = nn.BCEWithLogitsLoss()
        criterion = LabelSmoothing()
        ### https://github.com/clovaai/CutMix-PyTorch/blob/master/train.py
        output = self.arch(x)
        loss = criterion(output, targets.view(-1,1).float())
        return output,loss

In [12]:
df_folds = pd.read_csv('../input/FinalFolds_combinedExternal.csv')
df_folds['image_id']=df_folds['image_name']
df_folds.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target,source,stratify_group,fold,image_id
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0,ISIC20,20,0,ISIC_2637011
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0,ISIC20,2,1,ISIC_0015719
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0,ISIC20,0,4,ISIC_0052212
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0,ISIC20,2,3,ISIC_0068279
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0,ISIC20,1,3,ISIC_0074268


In [13]:
class Microscope(A.ImageOnlyTransform):
    def __init__(self, p: float = 0.5, always_apply=False):
        super().__init__(always_apply, p)

    def apply(self, img, **params):
        if random.random() < self.p:
            circle = cv2.circle((np.ones(img.shape) * 255).astype(np.uint8),
                        (img.shape[0]//2, img.shape[1]//2),
                        random.randint(img.shape[0]//2 - 3, img.shape[0]//2 + 15),
                        (0, 0, 0),
                        -1)

            mask = circle - 255
            img = np.multiply(img, mask)

        return img

In [14]:
import pretrainedmodels

def train(fold,bs,epochs,fp16,sz,arch='se_resnet152',debug=False,accumulation_steps=1):
    if sz is not None:
        sz = (sz,sz)
    else:
        sz = None
    
    _n = arch
    import os
#     training_data_path = '../input/128X128/train/'
    training_data_path = '../input/384X384/train/'
    df = df_folds
    device = "cuda"
    epochs = epochs
    train_bs = bs
    valid_bs = bs//2

    df_train = df[df.fold != fold].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)

    
    arch = EfficientNet.from_pretrained(arch)
#     arch = pretrainedmodels.__dict__[arch](num_classes=1000, pretrained='imagenet')
    model = Net(arch=arch)  # New model for each fold
    model = model.to(device)     
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.CoarseDropout(),
            albumentations.RandomBrightness(0.3),
            albumentations.RandomContrast(0.3),
            albumentations.ChannelShuffle(),
            albumentations.Cutout(4,4,4),
            Microscope(),
            albumentations.ChannelDropout(p=0.1),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=30),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    if debug:
        train_images = df_train.image_id.values.tolist()[:250]
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values[:250]

        valid_images = df_valid.image_id.values.tolist()[:250]
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values[:250]
    else:
        train_images = df_train.image_id.values.tolist()
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values

        valid_images = df_valid.image_id.values.tolist()
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values
        
    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=sz,
#         tabularDF = df_train,
#         cols = cols,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=sz,
#         tabularDF = df_valid,
#         cols = cols,
        augmentations=valid_aug)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=2,
        threshold=0.001,
        mode="max"
    )
    if fp16:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    es = EarlyStopping(patience=3, mode="max")
    
    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device,fp16=fp16,accumulation_steps=accumulation_steps)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)
        
        if sz is not None:
            ss = sz[0]
        else:
            ss = 384
            
        es(auc, model, model_path= "../models/model_arch_{}_sz_{}_fold_{}_epoch_{}_auc_{}.bin".format(_n,ss,fold,epoch,round(auc*100,2)))
        if es.early_stop:
            print("Early stopping")
            break

In [15]:
from apex import amp, optimizers

In [None]:
e = 25
debug= False
bs = 14
accumulation_steps = 1
mtype = 'efficientnet-b6'
from apex import amp, optimizers
apx = True

# train(0,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
# train(1,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
# train(2,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(3,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(4,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)

Loaded pretrained weights for efficientnet-b6


  0%|          | 0/3341 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  9%|▊         | 284/3341 [02:26<24:28,  2.08it/s, loss=0.365]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 3341/3341 [28:49<00:00,  1.93it/s, loss=0.286]
100%|██████████| 1671/1671 [01:42<00:00, 16.33it/s, loss=0.283]
  0%|          | 0/3341 [00:00<?, ?it/s]

Epoch = 0, AUC = 0.9066865999285747
Validation score improved (-inf --> 0.9066865999285747). Saving model!


 41%|████      | 1358/3341 [11:48<16:18,  2.03it/s, loss=0.265]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


 52%|█████▏    | 1753/3341 [15:14<13:36,  1.94it/s, loss=0.264]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 3341/3341 [29:01<00:00,  1.92it/s, loss=0.263]
100%|██████████| 1671/1671 [01:41<00:00, 16.39it/s, loss=0.257]
  0%|          | 0/3341 [00:00<?, ?it/s]

Epoch = 1, AUC = 0.9105796298106755
Validation score improved (0.9066865999285747 --> 0.9105796298106755). Saving model!


 89%|████████▊ | 2957/3341 [25:38<03:13,  1.98it/s, loss=0.251]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


100%|██████████| 3341/3341 [28:58<00:00,  1.92it/s, loss=0.252]
100%|██████████| 1671/1671 [01:42<00:00, 16.32it/s, loss=0.25] 


Epoch = 2, AUC = 0.9162162392681101
Validation score improved (0.9105796298106755 --> 0.9162162392681101). Saving model!


 93%|█████████▎| 3108/3341 [26:41<01:49,  2.13it/s, loss=0.243]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


100%|██████████| 3341/3341 [28:38<00:00,  1.94it/s, loss=0.243]
100%|██████████| 1671/1671 [01:36<00:00, 17.24it/s, loss=0.247]
  0%|          | 0/3341 [00:00<?, ?it/s]

Epoch = 3, AUC = 0.9160102544838375
EarlyStopping counter: 1 out of 3


  5%|▌         | 179/3341 [01:29<24:25,  2.16it/s, loss=0.217]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 3341/3341 [27:52<00:00,  2.00it/s, loss=0.236]
100%|██████████| 1671/1671 [01:37<00:00, 17.18it/s, loss=0.235]
  0%|          | 0/3341 [00:00<?, ?it/s]

Epoch = 4, AUC = 0.9319396374704483
Validation score improved (0.9162162392681101 --> 0.9319396374704483). Saving model!


 27%|██▋       | 890/3341 [07:25<19:06,  2.14it/s, loss=0.232]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


 53%|█████▎    | 1767/3341 [14:44<12:12,  2.15it/s, loss=0.231]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 3341/3341 [27:52<00:00,  2.00it/s, loss=0.228]
100%|██████████| 1671/1671 [01:37<00:00, 17.20it/s, loss=0.244]
  0%|          | 0/3341 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9348223697641483
Validation score improved (0.9319396374704483 --> 0.9348223697641483). Saving model!


 62%|██████▏   | 2058/3341 [17:30<10:15,  2.09it/s, loss=0.221]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


100%|██████████| 3341/3341 [28:33<00:00,  1.95it/s, loss=0.22] 
 23%|██▎       | 381/1671 [00:23<01:15, 17.10it/s, loss=0.166]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 3341/3341 [28:49<00:00,  1.93it/s, loss=0.215]
 94%|█████████▍| 1577/1671 [01:36<00:05, 16.23it/s, loss=0.225]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1671/1671 [01:42<00:00, 16.32it/s, loss=0.222]
  0%|          | 0/3341 [00:00<?,

Epoch = 8, AUC = 0.9318910760531409
EarlyStopping counter: 2 out of 3


 14%|█▍        | 478/3341 [04:09<22:54,  2.08it/s, loss=0.202]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 29%|██▉       | 961/3341 [08:21<20:46,  1.91it/s, loss=0.205]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 58%|█████▊    | 1949/3336 [17:46<12:33,  1.84it/s, loss=0.295]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 87%|████████▋ | 2915/3336 [25:17<03:37,  1.94it/s, loss=0.265]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the c

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


100%|██████████| 3336/3336 [29:03<00:00,  1.91it/s, loss=0.254]
 18%|█▊        | 301/1681 [00:20<01:34, 14.54it/s, loss=0.166]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 3336/3336 [30:10<00:00,  1.84it/s, loss=0.246]
 91%|█████████▏| 1535/1681 [01:43<00:09, 14.89it/s, loss=0.246]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1681/1681 [01:52<00:00, 14.97it/s, loss=0.234]
  0%|          | 0/3336 [00:00<?,

Epoch = 4, AUC = 0.9388427108251423
Validation score improved (0.9233435615970853 --> 0.9388427108251423). Saving model!


 22%|██▏       | 743/3336 [06:46<23:33,  1.83it/s, loss=0.227]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1681/1681 [01:52<00:00, 14.97it/s, loss=0.23] 
  0%|          | 0/3336 [00:00<?, ?it/s]

Epoch = 5, AUC = 0.9392879422043265
Validation score improved (0.9388427108251423 --> 0.9392879422043265). Saving model!


 22%|██▏       | 722/3336 [06:33<23:41,  1.84it/s, loss=0.22] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 53%|█████▎    | 1784/3336 [16:13<13:58,  1.85it/s, loss=0.22] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 24%|██▎       | 785/3336 [07:08<21:20,  1.99it/s, loss=0.21] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 65536.0


 30%|██▉       | 989/3336 [08:59<19:41,  1.99it/s, loss=0.213]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


 48%|████▊     | 1602/3336 [14:33<15:36,  1.85it/s, loss=0.215]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 78%|███████▊  | 2615/3336 [23:45<06:34,  1.83it/s, loss=0.215]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 74%|███████▍  | 2482/3336 [22:34<07:42,  1.85it/s, loss=0.21] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the 