In [1]:
import glob
import os
import time
import logging
import pandas as pd
from sklearn import model_selection
from PIL import Image
import torch
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import albumentations as aug
import efficientnet_pytorch
import random
import torchvision
from tqdm import tqdm
from torch.cuda import amp
import pretrainedmodels
from sklearn.metrics import accuracy_score
from importlib import reload  # Not needed in Python 2
from torch.nn.modules.loss import _WeightedLoss
import timm

import hashlib
import joblib
import Cassava
import Tracker

import warnings
warnings.filterwarnings('ignore')

In [2]:
DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')

TRAIN_FILE = 'D:\\Dataset\\Cassava Competiton\\train.csv'
TEST_FILE = 'D:\\Dataset\\Cassava Competiton\\test_images\\'

HOME_PATH = 'D:\\cassava_competition'

IMAGE_SIZE_AUG = 128

p=0.5
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [3]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

In [4]:
def logger_init(path):
    from importlib import reload  # Not needed in Python 2
    reload(logging)
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    formatter = logging.Formatter('%(asctime)s,%(name)s,%(message)s')

    file_handler = logging.FileHandler(path, mode='w')
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
 
    logger.addHandler(file_handler)
    return logger

In [5]:
def logger_init_all(path):
        reload(logging)
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.INFO)

        formatter = logging.Formatter('%(asctime)s,%(name)s,%(message)s')

        file_handler = logging.FileHandler(path, mode='a')
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(formatter)
 
        logger.addHandler(file_handler)
        return logger

In [22]:
train_aug = aug.Compose(
        [     
        aug.RandomResizedCrop(IMAGE_SIZE_AUG, IMAGE_SIZE_AUG),
        aug.Transpose(p=p),
        aug.HorizontalFlip(p=p),
        aug.VerticalFlip(p=p),
        aug.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=True, p=1),
        aug.ShiftScaleRotate(p=p),
        aug.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),  
        ]
    )

valid_aug = aug.Compose(
       [    
        aug.CenterCrop(IMAGE_SIZE_AUG, IMAGE_SIZE_AUG, p=1.0),
        aug.Resize(IMAGE_SIZE_AUG, IMAGE_SIZE_AUG),
        aug.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),  
       ]
    )


In [23]:
hyper_parameter = {
    
    'EPOCHS' : 20,
    'BATCH_SIZE' : 16,
    'LEARNING_RATE' : 1e-3,
    'WEIGHT_DECAY' : 1e-5,
    'NFOLDS' : 5,
    'EARLY_STOPPING_STEPS': 5,
    'EARLY_STOP' : False,
    'IMAGE_SIZE' : IMAGE_SIZE_AUG,
    'ACCU_STEPS' : 1,
    'train_aug' : train_aug,
    'valid_aug' : valid_aug,
    'SMOOTHING' : 0.05
}


In [24]:
train_df = pd.read_csv(TRAIN_FILE)
test_images = [i for i in glob.glob(f'{TEST_FILE}\*')]
test_images_sub = [os.path.basename(i) for i in glob.glob(f'{TEST_FILE}\*')]

In [25]:
if __name__ == "__main__":
    input_path='D:\\Dataset\\Cassava Competiton\\'
    train_df["fold"] = -1
    train_df = train_df.sample(frac=1).reset_index(drop=True)
    y = train_df.label.values
    skf = model_selection.StratifiedKFold(n_splits=5,shuffle=True,random_state=42)
    for fold,(idxT,idxV) in enumerate(skf.split(X=train_df, y=y)):
        train_df.loc[idxV, "fold"] = fold
    train_df.to_csv(os.path.join(input_path, "train_folds.csv"), index=False)

In [26]:
class CassavaModel(nn.Module):
    def __init__(self):
        super().__init__()        
        self.features = efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b4')
        self.dropout = nn.Dropout(0.2)
        self.out = nn.Linear(1792, 5)
             
    def forward(self, image, targets=None):    
        batch_size, _, _, _ = image.shape
        x = self.features.extract_features(image)   
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        x_ = self.out(self.dropout(x))
        
        return x_


In [27]:
class SEResNext50_32x4d(nn.Module):
    def __init__(self, pretrained="imagenet"):
        super(SEResNext50_32x4d, self).__init__()
        self.model = pretrainedmodels.__dict__[
            "se_resnext50_32x4d"
        ](pretrained=pretrained)

        self.out = nn.Linear(2048, 5)
    
    def forward(self, image, targets=None):
        bs, _, _, _ = image.shape
        x = self.model.features(image)
        x = F.adaptive_avg_pool2d(x, 1)
        x = x.reshape(bs, -1)
        out = self.out(x)
        return out

In [28]:
class densenet121(nn.Module):
    def __init__(self, pretrained="imagenet"):
        super(densenet121, self).__init__()
        self.model = pretrainedmodels.__dict__[
            "densenet121"
        ](pretrained=pretrained)

        self.out = nn.Linear(1024, 5)
    
    def forward(self, image, targets=None):
        bs, _, _, _ = image.shape
        x = self.model.features(image)
        x = F.adaptive_avg_pool2d(x, 1)
        x = x.reshape(bs, -1)
        out = self.out(x)
        return out

In [29]:
class EfficientNet(nn.Module):
    def __init__(self):
        super(EfficientNet, self).__init__()
        self.base_model = timm.create_model(f"tf_efficientnet_b4_ns", pretrained=True)
        self.dropout = nn.Dropout(0.2)
        
        self.out = nn.Linear(
            in_features=1792, 
            out_features=5, 
            bias=True
        )
        
    def forward(self, image, targets=None):
        batch_size, _, _, _ = image.shape
        
        x = self.base_model.forward_features(image) 
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        out = self.out(self.dropout(x))  
        
        return out

In [30]:
def linear_combination(x, y, epsilon): 
    return epsilon*x + (1-epsilon)*y

import torch.nn.functional as F


def reduce_loss(loss, reduction='mean'):
    return loss.mean() if reduction=='mean' else loss.sum() if reduction=='sum' else loss


class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, epsilon:float=0.01, reduction='mean'):
        super().__init__()
        self.epsilon = epsilon
        self.reduction = reduction
    
    def forward(self, preds, target):
        n = preds.size()[-1]
        log_preds = F.log_softmax(preds, dim=-1)
        loss = reduce_loss(-log_preds.sum(dim=-1), self.reduction)
        nll = F.nll_loss(log_preds, target, reduction=self.reduction)
        return linear_combination(loss/n, nll, self.epsilon)

In [31]:
class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=hyper_parameter['SMOOTHING']):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth_one_hot(targets:torch.Tensor, n_classes:int, smoothing=hyper_parameter['SMOOTHING']):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = torch.empty(size=(targets.size(0), n_classes),
                    device=targets.device) \
                .fill_(smoothing /(n_classes-1)) \
                .scatter_(1, targets.data.unsqueeze(1), 1.-smoothing)
        return targets

    def forward(self, inputs, targets):
        targets = SmoothCrossEntropyLoss._smooth_one_hot(targets, inputs.size(-1),
            self.smoothing)
        lsm = F.log_softmax(inputs, -1)

        if self.weight is not None:
            lsm = lsm * self.weight.unsqueeze(0)

        loss = -(targets * lsm).sum(-1)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [32]:
class AverageMeter:

    def __init__(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [33]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader, device, accumulation_steps=1):
    model.train()
    losses = AverageMeter()
    scaler = amp.GradScaler()
    tk0 = tqdm(dataloader, total = len(dataloader))           
    for b_idx, data in enumerate(tk0):
        if accumulation_steps > 1:
            optimizer.zero_grad()
            
        inputs, targets = data['image'].to(device), data['targets'].to(device)
        with amp.autocast():
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
        if accumulation_steps ==1 and b_idx==0:
            optimizer.zero_grad()
            
        with torch.set_grad_enabled(True):
            scaler.scale(loss).backward()
            if (b_idx + 1) % accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
                if b_idx > 0:
                    optimizer.zero_grad()
        losses.update(loss.item(), dataloader.batch_size)
        tk0.set_postfix(loss=losses.avg)
    tk0.close()
    return losses.avg


def valid_fn(model, loss_fn, dataloader, device):
    model.eval()
    losses = AverageMeter()
    valid_preds = []
    with torch.no_grad():
        tk0 = tqdm(dataloader, total = len(dataloader))           
        for b_idx, data in enumerate(tk0):     
            inputs, targets = data['image'].to(device), data['targets'].to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)
            valid_preds.append(outputs.sigmoid().detach().cpu().numpy())
            losses.update(loss.item(), dataloader.batch_size)
            tk0.set_postfix(loss=losses.avg)
        valid_preds = np.concatenate(valid_preds)
        tk0.close()
    return losses.avg, valid_preds


def inference_fn(model, dataloader, device):
    model.eval()
    preds = [] 
    with torch.no_grad():
        tk0 = tqdm(dataloader, total=len(dataloader))
        for data in tk0:
            inputs = data['image'].to(device)
            outputs = model(inputs)
            preds.append(outputs.sigmoid().detach().cpu().numpy())
        preds = np.concatenate(preds)
    return preds

In [34]:
def run_training(fold, seed):
    training_data_path = 'D:\\Dataset\\Cassava Competiton\\train_images\\'
    
    seed_everything(seed)
    
    resize = (hyper_parameter['IMAGE_SIZE'],hyper_parameter['IMAGE_SIZE'])

    train = pd.read_csv(input_path+'train_folds.csv')
                  
    trn_idx = train[train['fold'] != fold].index
    val_idx = train[train['fold'] == fold].index
    
    train_df = train[train['fold'] != fold].reset_index(drop=True)
    valid_df = train[train['fold'] == fold].reset_index(drop=True)
    
    train_images = train_df.image_id.values.tolist()
    train_images = [os.path.join(training_data_path, i) for i in train_images]
    train_targets = train_df.label.values

    valid_images = valid_df.image_id.values.tolist()
    valid_images = [os.path.join(training_data_path, i) for i in valid_images]
    valid_targets = valid_df.label.values               
        
    train_dataset = Cassava.Cassava_Train_DS(train_images, train_targets, resize = resize, augmentations = train_aug)
    valid_dataset = Cassava.Cassava_Train_DS(valid_images, valid_targets, resize = resize, augmentations = valid_aug)
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=hyper_parameter['BATCH_SIZE'], shuffle=True, num_workers=4)
    validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=hyper_parameter['BATCH_SIZE'], shuffle=False, num_workers=4)
        
    model = EfficientNet()
    model.to(DEVICE)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=hyper_parameter['LEARNING_RATE'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
    )
    #loss_tr_fn = SmoothCrossEntropyLoss()
    loss_fn = SmoothCrossEntropyLoss()
    
    early_stopping_steps = hyper_parameter['EARLY_STOPPING_STEPS']
    early_step = 0
   
    oof = np.zeros((len(train), 5))
    best_loss = np.inf
    
    for epoch in range(hyper_parameter['EPOCHS']):
                
        train_loss = train_fn(model, optimizer,scheduler, loss_fn, trainloader, DEVICE, accumulation_steps=hyper_parameter['ACCU_STEPS'])
        print(f"FOLD: {fold}, EPOCH: {epoch}, train_loss: {train_loss}")        
        valid_loss, valid_preds = valid_fn(model, loss_fn, validloader, DEVICE)
        print(f"FOLD: {fold}, EPOCH: {epoch}, valid_loss: {valid_loss}")
        score = accuracy_score(valid_targets, valid_preds.argmax(1))
        print(f"fold: {fold} Accuracy: {score}")
                
        if valid_loss < best_loss:
            
            best_loss = valid_loss
            oof[val_idx] = valid_preds
            torch.save(model.state_dict(), f"{HOME_PATH}\\{cassava_init.exper_dict}\\artefacts\\model\\{fold}_{seed}.pth")
        
        elif(hyper_parameter['EARLY_STOP'] == True):
            
            early_step += 1
            if (early_step >= early_stopping_steps):
                break
                
        logger.info(f"{cassava_init.exper_dict}{fold},{epoch},{train_loss},{valid_loss},{best_loss},{score},{hyper_parameter['EPOCHS']},{hyper_parameter['BATCH_SIZE']},{hyper_parameter['LEARNING_RATE']},{hyper_parameter['WEIGHT_DECAY']},{hyper_parameter['NFOLDS']},{hyper_parameter['EARLY_STOPPING_STEPS']},{hyper_parameter['EARLY_STOP']}, {hyper_parameter['IMAGE_SIZE']},{hyper_parameter['ACCU_STEPS']}")
        logger_all.info(f"{cassava_init.exper_dict},{fold},{epoch},{train_loss},{valid_loss},{best_loss},{score},{hyper_parameter['EPOCHS']},{hyper_parameter['BATCH_SIZE']},{hyper_parameter['LEARNING_RATE']},{hyper_parameter['WEIGHT_DECAY']},{hyper_parameter['NFOLDS']},{hyper_parameter['EARLY_STOPPING_STEPS']},{hyper_parameter['EARLY_STOP']}, {hyper_parameter['IMAGE_SIZE']},{hyper_parameter['ACCU_STEPS']}")
            
    
    #--------------------- PREDICTION---------------------
    testdataset = Cassava.Cassava_Test_DS(test_images, resize = resize , augmentations = train_aug)
    testloader = torch.utils.data.DataLoader(testdataset, batch_size=hyper_parameter['BATCH_SIZE'], shuffle=False, num_workers=4)
    
    model = EfficientNet()
    
    model.load_state_dict(torch.load(f"{HOME_PATH}\\{cassava_init.exper_dict}\\artefacts\\model\\{fold}_{seed}.pth"))
    model.to(DEVICE)
    
    predictions = np.zeros((len(test_images), 5))
    predictions = inference_fn(model, testloader, DEVICE)
    
    return oof, predictions



In [35]:
def run_k_fold(NFOLDS, seed):
    oof = np.zeros((len(train_df), 5))
    predictions = np.zeros((len(test_images), 5))
        
    for fold in range(hyper_parameter['NFOLDS']):
        oof_, pred_ = run_training(fold, seed)
        
        predictions += pred_ / hyper_parameter['NFOLDS']
        oof += oof_
        
    return oof, predictions

In [None]:
# Averaging on multiple SEEDS
SEED = [42,] #<-- Update
oof = np.zeros((len(train_df), 5))
predictions = np.zeros((len(test_images), 5))

logging.shutdown()
cassava_init = Tracker.Init(hyper_parameter, project_name='cassava_competition', parent_dir='D:\\' , is_sub=True, sub_directory=['model','config','logger'])
logger = logger_init(cassava_init.path+'\\logger\\logging.log')
logger_all = logger_init_all(os.path.join(cassava_init.parent_dir, cassava_init.project_name)+'\\logging.log')

logger.info('experiment,fold,epoch,train_loss,valid_loss,best_loss,accuracy,epochs,batch_size,learning_rate,weight_decay,nfolds,early_stopping_steps,early_stop,image_size,accu_steps')
    
if os.path.getsize('D:\\cassava_competition\\logging.log')==0:
    logger_all.info('experiment,fold,epoch,train_loss,valid_loss,best_loss,accuracy,epochs,batch_size,learning_rate,weight_decay,nfolds,early_stopping_steps,early_stop,image_size,accu_steps')

##for seed in SEED:
    
oof_, predictions_ = run_training(0, 42)
#oof += oof_ / len(SEED)
#predictions += predictions_ / len(SEED)

 92%|█████████████████████████████████████████████████████████████▌     | 983/1070 [07:57<01:11,  1.21it/s, loss=0.994]