In [1]:
# !pip install efficientnet_pytorch
# !pip install pretrainedmodels
# !pip install albumentations
# !pip install pandas
# !pip install sklearn

In [2]:

import torch
import albumentations

import numpy as np
import pandas as pd

import torch.nn as nn
from sklearn import metrics
from sklearn import model_selection
from torch.nn import functional as F
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
import pretrainedmodels

from albumentations.pytorch import ToTensor
from torchvision import transforms

import albumentations as A
import random
import cv2

In [3]:
df_folds = pd.read_csv('../input/FinalFolds_combinedExternal.csv')
df_folds['image_id']=df_folds['image_name']
df_folds.head(1)

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target,source,stratify_group,fold,image_id
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0,ISIC20,20,0,ISIC_2637011


In [4]:
test_df = pd.read_csv('../input/384X384/test.csv')
test_df.head(1)

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge
0,ISIC_0052060,IP_3579794,male,70.0,


In [5]:
df_folds['sex'] = df_folds['sex'].replace("unknown",'unknown_sex')
test_df['sex'] = test_df['sex'].replace("unknown",'unknown_sex')
df_folds['sex'].unique(),test_df['sex'].unique()

(array(['male', 'female', 'unknown_sex'], dtype=object),
 array(['male', 'female'], dtype=object))

In [6]:
### fill missing values
df_folds['sex'].fillna('unknown_sex',inplace = True)
test_df['sex'].fillna('unknown_sex',inplace = True)

df_folds['anatom_site_general_challenge'].fillna('unknown',inplace = True)
test_df['anatom_site_general_challenge'].fillna('unknown',inplace = True)

df_folds['age_approx'] = df_folds['age_approx'].fillna(df_folds['age_approx'].mode().values[0])
test_df['age_approx']  = test_df['age_approx'].fillna(test_df['age_approx'].mode().values[0])

In [7]:
print(df_folds.head(1),test_df.head(1))

     image_name  patient_id   sex  age_approx anatom_site_general_challenge  \
0  ISIC_2637011  IP_7279968  male        45.0                     head/neck   

  diagnosis benign_malignant  target  source  stratify_group  fold  \
0   unknown           benign       0  ISIC20              20     0   

       image_id  
0  ISIC_2637011        image_name  patient_id   sex  age_approx anatom_site_general_challenge
0  ISIC_0052060  IP_3579794  male        70.0                       unknown


In [8]:
### one hot enc for sex, age approx, anatom site general challenge

import gc
def getDummies(col,df_folds=df_folds,test_df=test_df):
    _ = pd.concat([df_folds[col],test_df[col]],0)
    _.reset_index(drop=True,inplace=True)
    if col == 'age_approx':
        prefix='age'
    else:
        prefix=None
    dummies = pd.get_dummies(_,prefix=prefix,dtype=np.float64)
    dummies.tail()
    dummies.iloc[len(df_folds):,:]

    tt = dummies.iloc[len(df_folds):,:]
    tt.reset_index(drop=True,inplace=True)
    test_df = pd.concat([test_df,tt],1)

    tt = dummies.iloc[:len(df_folds),:]
    tt.reset_index(drop=True,inplace=True)
    df_folds = pd.concat([df_folds,tt],1)
    del tt, dummies,_
    gc.collect()
    return df_folds,test_df

from tqdm import tqdm
for cc in tqdm(['sex','age_approx','anatom_site_general_challenge']):
    df_folds,test_df = getDummies(cc,df_folds,test_df)

100%|██████████| 3/3 [00:00<00:00,  8.18it/s]


In [9]:
df_folds['n_images'] = df_folds.patient_id.map(df_folds.groupby(['patient_id']).image_name.count())
test_df['n_images'] = test_df.patient_id.map(test_df.groupby(['patient_id']).image_name.count())

In [10]:
pd.set_option('display.max_columns',100)
df_folds.head()

Unnamed: 0,image_name,patient_id,sex,age_approx,anatom_site_general_challenge,diagnosis,benign_malignant,target,source,stratify_group,fold,image_id,female,male,unknown_sex,age_0.0,age_5.0,age_10.0,age_15.0,age_20.0,age_25.0,age_30.0,age_35.0,age_40.0,age_45.0,age_50.0,age_51.0,age_55.0,age_60.0,age_65.0,age_70.0,age_75.0,age_80.0,age_85.0,age_90.0,anterior torso,head/neck,lateral torso,lower extremity,oral/genital,palms/soles,posterior torso,torso,unknown,upper extremity,n_images
0,ISIC_2637011,IP_7279968,male,45.0,head/neck,unknown,benign,0,ISIC20,20,0,ISIC_2637011,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,115
1,ISIC_0015719,IP_3075186,female,45.0,upper extremity,unknown,benign,0,ISIC20,2,1,ISIC_0015719,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,24
2,ISIC_0052212,IP_2842074,female,50.0,lower extremity,nevus,benign,0,ISIC20,0,4,ISIC_0052212,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5
3,ISIC_0068279,IP_6890425,female,45.0,head/neck,unknown,benign,0,ISIC20,2,3,ISIC_0068279,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22
4,ISIC_0074268,IP_8723313,female,55.0,upper extremity,unknown,benign,0,ISIC20,1,3,ISIC_0074268,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,20


In [11]:
df_folds.columns

Index(['image_name', 'patient_id', 'sex', 'age_approx',
       'anatom_site_general_challenge', 'diagnosis', 'benign_malignant',
       'target', 'source', 'stratify_group', 'fold', 'image_id', 'female',
       'male', 'unknown_sex', 'age_0.0', 'age_5.0', 'age_10.0', 'age_15.0',
       'age_20.0', 'age_25.0', 'age_30.0', 'age_35.0', 'age_40.0', 'age_45.0',
       'age_50.0', 'age_51.0', 'age_55.0', 'age_60.0', 'age_65.0', 'age_70.0',
       'age_75.0', 'age_80.0', 'age_85.0', 'age_90.0', 'anterior torso',
       'head/neck', 'lateral torso', 'lower extremity', 'oral/genital',
       'palms/soles', 'posterior torso', 'torso', 'unknown', 'upper extremity',
       'n_images'],
      dtype='object')

In [12]:
cols = ['female','male', 'unknown', 'age_0.0', 'age_5.0', 'age_10.0', 'age_15.0',
        'age_20.0', 'age_25.0', 'age_30.0', 'age_35.0', 'age_40.0', 'age_45.0',
        'age_50.0', 'age_51.0', 'age_55.0', 'age_60.0', 'age_65.0', 'age_70.0',
        'age_75.0', 'age_80.0', 'age_85.0', 'age_90.0', 'anterior torso',
        'head/neck', 'lateral torso', 'lower extremity', 'oral/genital',
        'palms/soles', 'posterior torso', 'torso', 'unknown', 'upper extremity','n_images']

len(cols)

34

In [13]:
df_folds[cols].shape

(58457, 34)

In [14]:
import torch

import numpy as np

from PIL import Image
from PIL import ImageFile


ImageFile.LOAD_TRUNCATED_IMAGES = True


class ClassificationLoader:
    def __init__(self, image_paths, targets, resize,tabularDF=df_folds,cols=cols,augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations
        self.tabularDF = tabularDF
        self.cols = cols

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        image = np.array(image)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        tabFeats = self.tabularDF.loc[item,cols].values.astype(float)
#         print(tabFeats)
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long),
            "tabfeats":torch.tensor(np.array(tabFeats),dtype=torch.float)
        }


In [15]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [16]:
import torch
import numpy as np


class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.0001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(
                "EarlyStopping counter: {} out of {}".format(
                    self.counter, self.patience
                )
            )
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print(
                "Validation score improved ({} --> {}). Saving model!".format(
                    self.val_score, epoch_score
                )
            )
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [17]:
class Engine:
        
    @staticmethod
    def train(
        data_loader,
        model,
        optimizer,
        device,
        scheduler=None,
        accumulation_steps=1,
        use_tpu=False,
        fp16=False,
    ):

        try:
            from apex import amp
            _apex_available = True
        except ImportError:
            _apex_available = False

        if use_tpu and not _xla_available:
            raise Exception(
                "You want to use TPUs but you dont have pytorch_xla installed"
            )
        if fp16 and not _apex_available:
            raise Exception("You want to use fp16 but you dont have apex installed")
        if fp16 and use_tpu:
            raise Exception("Apex fp16 is not available when using TPUs")
        if fp16:
            accumulation_steps = 1
        losses = AverageMeter()
        predictions = []
        model.train()
        if accumulation_steps > 1:
            optimizer.zero_grad()
        tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
        for b_idx, data in enumerate(tk0):
            for key, value in data.items():
                data[key] = value.to(device)
            if accumulation_steps == 1 and b_idx == 0:
                optimizer.zero_grad()
            _, loss = model(**data)

            if not use_tpu:
                with torch.set_grad_enabled(True):
                    if fp16:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    if (b_idx + 1) % accumulation_steps == 0:
                        optimizer.step()
                        if scheduler is not None:
                            scheduler.step()
                        if b_idx > 0:
                            optimizer.zero_grad()
            else:
                loss.backward()
                xm.optimizer_step(optimizer)
                if scheduler is not None:
                    scheduler.step()
                if b_idx > 0:
                    optimizer.zero_grad()

            losses.update(loss.item(), data_loader.batch_size)
            tk0.set_postfix(loss=losses.avg)
        return losses.avg

    @staticmethod
    def evaluate(data_loader, model, device, use_tpu=False):
        losses = AverageMeter()
        final_predictions = []
        model.eval()
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, loss = model(**data)
                predictions = predictions.cpu()
                losses.update(loss.item(), data_loader.batch_size)
                final_predictions.append(predictions)
                tk0.set_postfix(loss=losses.avg)
        return final_predictions, losses.avg

    @staticmethod
    def predict(data_loader, model, device, use_tpu=False):
        model.eval()
        final_predictions = []
        with torch.no_grad():
            tk0 = tqdm(data_loader, total=len(data_loader), disable=use_tpu)
            for b_idx, data in enumerate(tk0):
                for key, value in data.items():
                    data[key] = value.to(device)
                predictions, _ = model(**data)
                predictions = predictions.cpu()
                final_predictions.append(predictions)
        return final_predictions

In [18]:
class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)

In [19]:
class LabelSmoothing(nn.Module):
    def __init__(self, smoothing = 0.05):
        super(LabelSmoothing, self).__init__()
        self.smoothing = smoothing
        self.confidence = 1.0 - smoothing

    def forward(self, x, target):
        x = x.float()
        target = target.float()
        
        target = target* (1 - self.smoothing) + 0.5 * self.smoothing
        loss = F.binary_cross_entropy_with_logits(x, target.type_as(x))

        return loss.mean()

In [20]:
# class Net(nn.Module):
#     def __init__(self, arch):
#         super(Net, self).__init__()
#         self.arch = arch
        
#         in_features  = arch._fc.in_features
#         self.arch._fc = nn.Linear(in_features=in_features, out_features=1, bias=True)
                                
#     def forward(self, image,targets):
#         """
#         No sigmoid in forward because we are going to use BCEWithLogitsLoss
#         Which applies sigmoid for us when calculating a loss
#         """
#         batch_size, _, _, _ = image.shape
#         x = image
# #         criterion = nn.BCEWithLogitsLoss()
#         criterion = LabelSmoothing()
#         ### https://github.com/clovaai/CutMix-PyTorch/blob/master/train.py
#         output = self.arch(x)
#         loss = criterion(output, targets.view(-1,1).float())
#         return output,loss

In [21]:
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        
        in_features  = arch._fc.in_features
        self.arch._fc = nn.Linear(in_features=in_features, out_features=256, bias=True)
                
        self.tabDense = nn.Linear(in_features = 34, out_features = 256,bias=True) ### 34 tabular features
        self.imgBn = nn.LayerNorm(256)
        self.fcout_1 = nn.Linear(512,128)
        self.fcout_2 = nn.Linear(128,1)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, image,targets,tabfeats):
        """
        No sigmoid in forward because we are going to use BCEWithLogitsLoss
        Which applies sigmoid for us when calculating a loss
        """
        batch_size, _, _, _ = image.shape
        x = image
        criterion = LabelSmoothing()
        ### https://github.com/clovaai/CutMix-PyTorch/blob/master/train.py
        output_image = self.arch(x)
        output_image = self.imgBn(output_image.unsqueeze(0).unsqueeze(0))
        output_image = output_image.squeeze(0).squeeze(0)

        output_tabular = self.tabDense(tabfeats)
        x = torch.cat((output_image, output_tabular), dim=1)
        x = self.dropout(x)
        x = self.fcout_1(x)
        x = self.dropout(x)
        output = self.fcout_2(x)
        loss = criterion(output, targets.view(-1,1).float())
        return output,loss

In [22]:
class Microscope(A.ImageOnlyTransform):
    def __init__(self, p: float = 0.5, always_apply=False):
        super().__init__(always_apply, p)

    def apply(self, img, **params):
        if random.random() < self.p:
            circle = cv2.circle((np.ones(img.shape) * 255).astype(np.uint8),
                        (img.shape[0]//2, img.shape[1]//2),
                        random.randint(img.shape[0]//2 - 3, img.shape[0]//2 + 15),
                        (0, 0, 0),
                        -1)

            mask = circle - 255
            img = np.multiply(img, mask)

        return img

In [23]:
import pretrainedmodels

def train(fold,bs,epochs,fp16,sz,arch='se_resnet152',debug=False,accumulation_steps=1):
    if sz is not None:
        sz = (sz,sz)
    else:
        sz = None
    
    _n = arch
    import os
#     training_data_path = '../input/128X128/train/'
    training_data_path = '../input/384X384/train/'
    df = df_folds.copy()
    device = "cuda"
    epochs = epochs
    train_bs = bs
    valid_bs = bs//2

    df_train = df[df.fold != fold].reset_index(drop=True)
    df_valid = df[df.fold == fold].reset_index(drop=True)

    
    arch = EfficientNet.from_pretrained(arch)
#     arch = pretrainedmodels.__dict__[arch](num_classes=1000, pretrained='imagenet')
    model = Net(arch=arch)  # New model for each fold
    model = model.to(device)     
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.CoarseDropout(),
            albumentations.RandomBrightness(0.3),
            albumentations.RandomContrast(0.3),
            albumentations.ChannelShuffle(),
            albumentations.Cutout(4,4,4),
            Microscope(),
            albumentations.ChannelDropout(p=0.1),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=30),
            albumentations.Flip(p=0.5)
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

    if debug:
        train_images = df_train.image_id.values.tolist()[:250]
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values[:250]

        valid_images = df_valid.image_id.values.tolist()[:250]
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values[:250]
    else:
        train_images = df_train.image_id.values.tolist()
        train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
        train_targets = df_train.target.values

        valid_images = df_valid.image_id.values.tolist()
        valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
        valid_targets = df_valid.target.values
        
    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=sz,
        tabularDF = df_train,
        cols = cols,
        augmentations=train_aug,
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_bs, shuffle=True, num_workers=4
    )

    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=sz,
        tabularDF = df_valid,
        cols = cols,
        augmentations=valid_aug)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=valid_bs, shuffle=False, num_workers=4
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=2,
        threshold=0.001,
        mode="max"
    )
    if fp16:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
    es = EarlyStopping(patience=3, mode="max")
    
    for epoch in range(epochs):
        train_loss = Engine.train(train_loader, model, optimizer, device=device,fp16=fp16,accumulation_steps=accumulation_steps)
        predictions, valid_loss = Engine.evaluate(
            valid_loader, model, device=device
        )
        predictions = np.vstack((predictions)).ravel()
        auc = metrics.roc_auc_score(valid_targets, predictions)
        print(f"Epoch = {epoch}, AUC = {auc}")
        scheduler.step(auc)
        
        if sz is not None:
            ss = sz[0]
        else:
            ss = 384
            
        es(auc, model, model_path= "../models/model_tabData_arch_{}_sz_{}_fold_{}_epoch_{}_auc_{}.bin".format(_n,ss,fold,epoch,round(auc*100,2)))
        if es.early_stop:
            print("Early stopping")
            break

In [24]:
from apex import amp, optimizers

In [None]:
e = 25
debug= False
bs = 14
accumulation_steps = 1
mtype = 'efficientnet-b6'
from apex import amp, optimizers
apx = True

train(0,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(1,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(2,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(3,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)
train(4,bs,e,apx,None,mtype,debug=debug,accumulation_steps=accumulation_steps)

Loaded pretrained weights for efficientnet-b6


  0%|          | 0/3335 [00:00<?, ?it/s]

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


  1%|▏         | 49/3335 [00:26<26:32,  2.06it/s, loss=0.324] 

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0


  2%|▏         | 52/3335 [00:27<25:42,  2.13it/s, loss=0.327]

Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


  4%|▎         | 123/3335 [01:04<27:39,  1.94it/s, loss=0.335]