## Prepare Lib

In [1]:
import os
print(os.listdir("../input"))
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

['images', 'test.csv', 'sample_submission.csv', 'train.csv']


In [2]:
import cv2
import matplotlib.pyplot as plt
from os.path import isfile
import torch.nn.init as init
import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
from PIL import Image, ImageFilter
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, OneHotEncoder
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim import Adam, SGD, RMSprop
import time
import math
from torch.nn.parameter import Parameter
from torch.autograd import Variable
import torch.functional as F
from tqdm import tqdm
from sklearn import metrics
import urllib
import pickle
import cv2
import torch.nn.functional as F
from torchvision import models
import seaborn as sns
import random
import sys
import shutil
import albumentations
from albumentations import pytorch as AT

from apex import amp
from efficientnet_pytorch import EfficientNet
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from torchcontrib.optim import SWA

# torch.backends.cudnn.benchmark = True

In [3]:
import scipy.special

SEED = 42
base_dir = '../input'
def seed_everything(seed=SEED):
    random.seed(seed)
    os.environ['PYHTONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(SEED)

def l2_norm(input,axis=1):
    norm = torch.norm(input,2,axis,True)
    output = torch.div(input, norm)
    return output

sigmoid = lambda x: scipy.special.expit(x)

In [4]:
# visualize tools
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()
    
def test_transform(img_path, transform):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = transform(image = img)['image']
    visualize(image = img)
    
def write_aug(img_path, transform, num=30):
    img = cv2.imread(img_path)
    for i in range(num):
        t = transform(image = img)['image']
        cv2.imwrite('./aug/'+str(i)+'.jpg',t)

## Param

In [5]:
FOLD = 5
BATCH_SIZE = 32  ## batch size * accumulate ~= 64 (64x1, 32x2, 24x3, 16*4)
ACCUMULATE = 2
LR = 1e-3
EPOCH = 20
IMG_SIZE = 384  

In [6]:
EXP = 1
while os.path.exists('./exp/exp%d'%EXP):
    EXP+=1
os.makedirs('./exp/exp%d'%EXP)

## Prepare Data

In [7]:
train_df = pd.read_csv( base_dir + '/train.csv')

In [8]:
train_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0


In [9]:
class PlantDataset(Dataset):
    
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):

        row = self.df.iloc[idx]
        label = np.argmax([row.healthy, row.multiple_diseases, row.rust, row.scab])
#         if label[1] == 1: # fix anno bug in label (should post process to recover raw label)
#             label[2] = 1
#             label[3] = 1
#         label = np.array(label)

        path = base_dir + '/images/' + row.image_id + '.jpg'
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)
            
        image = image['image']
            
        return image, label

In [10]:
def pre_trans(x, cols, rows):
    return (x * 2.0 - 1.0)

train_transform_advprop = albumentations.Compose([
    albumentations.Resize(IMG_SIZE, IMG_SIZE),
    albumentations.RandomRotate90(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.OneOf([
        albumentations.RandomBrightness(0.15, p=1), 
        albumentations.RandomContrast(0.15, p=1),
        albumentations.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, p=1),
    ], p=0.5), 
    albumentations.OneOf([
        albumentations.ISONoise(color_shift=(0.01, 0.03), intensity=(0.1, 0.3)),
        albumentations.IAASharpen(alpha=(0.1, 0.3), lightness=(0.5, 1.0)),
    ], p=0.5), 
    albumentations.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=45, border_mode=1, p=0.5),
    albumentations.Lambda(image = pre_trans),
    AT.ToTensor(),
    ])


test_transform = albumentations.Compose([
    albumentations.Resize(IMG_SIZE, IMG_SIZE),
    albumentations.Lambda(image = pre_trans),
    AT.ToTensor(),
    ])

  "Using lambda is incompatible with multiprocessing. "
  "Using lambda is incompatible with multiprocessing. "


In [11]:
sfolder = StratifiedKFold(n_splits=FOLD,random_state=SEED,shuffle=True)

tr_idx = []
val_idx = []

Y = np.array(train_df[['healthy','multiple_diseases','rust','scab']])
Y = np.argmax(Y, axis=1)

for train, val in sfolder.split(range(len(train_df)), Y):
    tr_idx.append(train)
    val_idx.append(val)
    print('Train: %s | test: %s' % (len(train), len(val)))

Train: 1454 | test: 367
Train: 1456 | test: 365
Train: 1458 | test: 363
Train: 1458 | test: 363
Train: 1458 | test: 363


## Modeling

In [12]:
from utils.ranger import RangerVA 
from utils.lr_scheduler import CosineAnnealingWarmUpRestarts
from utils.label_smooth import LSR

In [13]:
class smooth_L1_ohem(nn.Module):
    def __init__(self, top_k=0.5):
        super(smooth_L1_ohem, self).__init__()
        self.top_k = top_k
        self.loss = nn.SmoothL1Loss(reduction='none')

    def forward(self, input, target):
        loss = self.loss(input, target)
        if self.top_k == 1:
            return torch.mean(loss)
        else:
            valid_loss, idxs = torch.topk(loss, int(self.top_k * loss.size()[0]), dim=0)    
            return torch.mean(valid_loss)

In [14]:
class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        sz = sz or (1,1)
        self.ap = nn.AdaptiveAvgPool2d(sz)
        self.mp = nn.AdaptiveMaxPool2d(sz)
    def forward(self, x):
        return torch.cat([self.mp(x), self.ap(x)], 1)
    
def mish(input):
    return input * torch.tanh(F.softplus(input))
       
class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        return mish(input)

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)       
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [15]:
def train_model(epoch):
    model_conv.train()         
    avg_loss = 0.
    optimizer.zero_grad()
    for idx, (imgs, labels) in enumerate(train_loader):
        imgs_train, labels_train = imgs.cuda(), labels.cuda()
        output_train = model_conv(imgs_train)
        loss = criterion(output_train,labels_train)
        #loss.backward()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        if ((idx+1)%ACCUMULATE==0):
            torch.nn.utils.clip_grad_norm_(model_conv.parameters(), max_norm=5.0, norm_type=2)
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
        avg_loss += loss.item() / len(train_loader)  
    return avg_loss

def test_model():    
    avg_val_loss = 0.
    model_conv.eval()
    y_pred_val = np.zeros((len(valset), 4))
    y_true_val = np.zeros((len(valset)))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.cuda(), labels.cuda()
            output_test = model_conv(imgs_vaild)
            avg_val_loss += (criterion_test(output_test, labels_vaild).item() / len(val_loader)) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int) #.reshape(-1,4)
            b = output_test.detach().cpu().numpy() #.reshape(-1,4)

            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
            
    acc = sum(np.argmax(y_pred_val, axis=1) == y_true_val) / len(y_pred_val)

    return avg_val_loss, acc

In [16]:
def train(fold):
    best_avg_loss = 100.0 
    best_acc = 0.0

    ### training
    for epoch in range(EPOCH):   
        print('lr:', scheduler.get_lr()[0]) 
        start_time        = time.time()
        avg_loss          = train_model(epoch)
        avg_val_loss, acc = test_model()
        elapsed_time      = time.time() - start_time 
        print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t val_acc={:.4f} \t time={:.2f}s'.format(
            epoch + 1, EPOCH, avg_loss, avg_val_loss, acc, elapsed_time))

        if avg_val_loss < best_avg_loss:
            best_avg_loss = avg_val_loss
            
        if acc > best_acc:
            best_acc = acc
            torch.save(model_conv.state_dict(), './exp/exp' + str(EXP) + '/efficientnet-b5-best' + str(fold) + '.pth')
            print('model saved!')

        print('=================================')   

    print('best loss:', best_avg_loss, 'best accuracy:', best_acc)
    
    return best_avg_loss, best_acc

In [17]:
log = open('./exp/exp' + str(EXP) +'/log.txt', 'w')
log.write('IMG_SIZE%d\n'%IMG_SIZE)
log.write('SEED%d\n'%SEED)
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    trainset     = PlantDataset(train_df.iloc[tr_idx[fold]].reset_index(), transform =train_transform_advprop)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

    valset       = PlantDataset(train_df.iloc[val_idx[fold]].reset_index(), transform   =test_transform)
    val_loader   = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

    ####################### Model ########################
    model_conv = EfficientNet.from_pretrained("efficientnet-b5", advprop=True)
    model_conv._dropout = nn.Dropout(p=0.5)
    model_conv._avg_pooling = AdaptiveConcatPool2d()
    model_conv._fc = nn.Sequential(nn.Linear(2048*2,256), Mish(), nn.Dropout(p=0.5), nn.Linear(256,4))
    model_conv.cuda()

    ###################### Optim ########################
    optimizer = torch.optim.AdamW(model_conv.parameters(), lr=LR/25., weight_decay=1e-4)

    criterion = LSR()
    criterion_test = nn.CrossEntropyLoss()

    T = len(train_loader)//ACCUMULATE * 20 # cycle
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=T, T_mult=1, eta_max=LR, T_up=T//20, gamma=0.2)
    scheduler.step()

    model_conv, optimizer = amp.initialize(model_conv, optimizer, opt_level="O1",verbosity=0)
    
    val_loss, val_acc = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_acc)
    log.write('[Flod%d] val loss:%.5f \t val acc:%.5f; \n'%(fold, val_loss, val_acc))

cv_loss = sum(cv_losses)/FOLD   
cv_acc = sum(cv_metrics)/FOLD   
print('CV loss:%.6f \t CV accuracy:%.6f'%(cv_loss, cv_acc))
log.write('CV loss:%.6f \t CV accuracy:%.6f'%(cv_loss, cv_acc))


 ********** Fold 0 **********

Loaded pretrained weights for efficientnet-b5
lr: 5e-05
Epoch 1/30 	 loss=1.2454 	 val_loss=1.0951 	 val_acc=0.7139 	 time=62.47s
model saved!
lr: 0.001
Epoch 2/30 	 loss=0.7223 	 val_loss=0.8585 	 val_acc=0.7520 	 time=62.71s
model saved!
lr: 0.0009713539948733066
Epoch 3/30 	 loss=0.6536 	 val_loss=0.2752 	 val_acc=0.9210 	 time=62.98s
model saved!
lr: 0.0008888711104815146
Epoch 4/30 	 loss=0.5591 	 val_loss=0.2791 	 val_acc=0.9292 	 time=63.56s
model saved!
lr: 0.0007625
Epoch 5/30 	 loss=0.5167 	 val_loss=0.2183 	 val_acc=0.9264 	 time=63.52s
lr: 0.000607482884391792
Epoch 6/30 	 loss=0.5097 	 val_loss=0.1898 	 val_acc=0.9591 	 time=62.98s
model saved!
lr: 0.00044251711560820814
Epoch 7/30 	 loss=0.4713 	 val_loss=0.1639 	 val_acc=0.9537 	 time=63.26s
lr: 0.0002875000000000001
Epoch 8/30 	 loss=0.4354 	 val_loss=0.1702 	 val_acc=0.9673 	 time=63.51s
model saved!
lr: 0.0001611288895184855
Epoch 9/30 	 loss=0.4254 	 val_loss=0.1528 	 val_acc=0.9755 	 

Epoch 30/30 	 loss=0.3740 	 val_loss=0.1732 	 val_acc=0.9671 	 time=64.58s
best loss: 0.14642868336776027 best accuracy: 0.9726027397260274

 ********** Fold 2 **********

Loaded pretrained weights for efficientnet-b5
lr: 5e-05
Epoch 1/30 	 loss=1.3059 	 val_loss=0.9984 	 val_acc=0.7493 	 time=65.54s
model saved!
lr: 0.001
Epoch 2/30 	 loss=0.7262 	 val_loss=0.3976 	 val_acc=0.8926 	 time=65.29s
model saved!
lr: 0.0009713539948733066
Epoch 3/30 	 loss=0.6096 	 val_loss=0.3053 	 val_acc=0.9229 	 time=65.25s
model saved!
lr: 0.0008888711104815145
Epoch 4/30 	 loss=0.5929 	 val_loss=0.2516 	 val_acc=0.9339 	 time=65.74s
model saved!
lr: 0.0007625
Epoch 5/30 	 loss=0.5483 	 val_loss=0.2724 	 val_acc=0.9284 	 time=65.41s
lr: 0.000607482884391792
Epoch 6/30 	 loss=0.4759 	 val_loss=0.2403 	 val_acc=0.9366 	 time=64.89s
model saved!
lr: 0.00044251711560820814
Epoch 7/30 	 loss=0.5115 	 val_loss=0.2368 	 val_acc=0.9477 	 time=64.63s
model saved!
lr: 0.0002875000000000001
Epoch 8/30 	 loss=0.45

Epoch 29/30 	 loss=0.3712 	 val_loss=0.1569 	 val_acc=0.9614 	 time=64.41s
lr: 5e-05
Epoch 30/30 	 loss=0.3709 	 val_loss=0.1507 	 val_acc=0.9614 	 time=65.26s
best loss: 0.14566902492357336 best accuracy: 0.977961432506887

 ********** Fold 4 **********

Loaded pretrained weights for efficientnet-b5
lr: 5e-05
Epoch 1/30 	 loss=1.3115 	 val_loss=1.6796 	 val_acc=0.6860 	 time=65.90s
model saved!
lr: 0.001
Epoch 2/30 	 loss=0.6773 	 val_loss=0.3689 	 val_acc=0.8871 	 time=65.43s
model saved!
lr: 0.0009713539948733066
Epoch 3/30 	 loss=0.6012 	 val_loss=0.2419 	 val_acc=0.9229 	 time=66.12s
model saved!
lr: 0.0008888711104815145
Epoch 4/30 	 loss=0.5428 	 val_loss=0.2467 	 val_acc=0.9339 	 time=65.78s
model saved!
lr: 0.0007625
Epoch 5/30 	 loss=0.5260 	 val_loss=0.2413 	 val_acc=0.9339 	 time=65.26s
lr: 0.000607482884391792
Epoch 6/30 	 loss=0.5185 	 val_loss=0.2563 	 val_acc=0.9256 	 time=65.13s
lr: 0.00044251711560820814
Epoch 7/30 	 loss=0.5076 	 val_loss=0.1764 	 val_acc=0.9642 	 ti

39

In [18]:
shutil.copyfile('./pipeline-ls.ipynb', './exp/exp' + str(EXP) + '/pipeline.ipynb')

'./exp/exp3/pipeline.ipynb'

In [19]:
log.close()