# Imports

In [1]:
!pip install -q efficientnet_pytorch

You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
from fastai.vision.all import *
from fastai.vision.core import *
from fastai.callback.fp16 import *

import pandas as pd
import numpy as np

from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import train_test_split, StratifiedKFold

import albumentations

import wandb
from fastai.callback.wandb import *

# Setup

In [3]:
path_str = '../input/cassava-leaf-disease-classification'

images_path = Path(path_str + '/train_images')
csv_path = Path(path_str + '/train.csv')
full_df = pd.read_csv(csv_path)

In [4]:
class Config:
    testing     = False
    image_size  = 512
    batch_size  = 16
    folds       = 3
    epochs      = 10
    f_epochs    = 1
    arch        = 'efficientnet-b4'
    
cfg = Config()

# Create a test dataset

In [5]:
if cfg.testing:
    msk = np.random.rand(len(full_df)) < 0.01
    full_df = full_df[msk]
else:
    wandb.login(key="11b470b697ff94b3896d2243b147d42177a5cb7a")
    wandb.init(project="cassava", entity="teo03")
len(full_df)

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mteo03[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.14 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


21397

# Augmentation and train functions

In [6]:
class AlbumentationsTransform(RandTransform):
    split_idx,order = None, 2
    
    def __init__(self, train_aug, valid_aug): 
        store_attr()
    
    def before_call(self, b, split_idx):
        self.idx = split_idx
    
    def encodes(self, img: PILImage):
        if self.idx == 0:
            aug_img = self.train_aug(image=np.array(img))['image']
        else:
            aug_img = self.valid_aug(image=np.array(img))['image']
        return PILImage.create(aug_img)


def get_train_aug(size): 
    return albumentations.Compose([
            #albumentations.Resize(size, size), # keep this?
            albumentations.RandomResizedCrop(size,size),
            albumentations.Transpose(p=0.5),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.VerticalFlip(p=0.5),
            albumentations.ShiftScaleRotate(p=0.5),
            albumentations.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
            albumentations.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),
            albumentations.CoarseDropout(p=0.5),
            albumentations.Cutout(p=0.5)
])

def get_valid_aug(size): 
    return albumentations.Compose([
        albumentations.CenterCrop(size, size, p=1.),
        albumentations.Resize(size, size),
], p=1.)

In [7]:
def train(dls, fold_i):
    
    model = EfficientNet.from_pretrained(cfg.arch, num_classes=5)

    # define learner
    learn = Learner(
        dls=dls,
        model=model,
        opt_func=ranger,
        metrics=accuracy,
        loss_func=LabelSmoothingCrossEntropy(),
        cbs=[WandbCallback(log_preds=False, log_model=False, n_preds=10)],
    ).to_fp16()
    
    lr_min, lr_steep = learn.lr_find(show_plot=False)
    print(f'found lr of({lr_min}): {round(lr_min, 5)}')
    
    
    # start model training
    learn.fine_tune(
        cfg.epochs,
        base_lr=round(lr_min, 5),
        freeze_epochs=cfg.f_epochs
    )
    
    learn.export(Path(f'model-f{fold_i}.pkl'))
    
    return learn

In [8]:
def get_x(row): return images_path/row['image_id']
def get_y(row): return row['label']

# Training
Split to train/test sets and train model on different folds using k-fold

In [9]:
train_df, test_df = train_test_split(full_df, test_size=0.1)
len(train_df), len(test_df)

(19257, 2140)

In [10]:
skf = StratifiedKFold(n_splits=cfg.folds, shuffle=True)
i = 0

for train_index, val_index in skf.split(train_df.index, train_df['label']):

    print(f'training on fold: {i} \n' )
    
    train_block = DataBlock(
            blocks=(ImageBlock, CategoryBlock),
            get_x=get_x,
            get_y=get_y,
            splitter=IndexSplitter(val_index),
            item_tfms= [
                AlbumentationsTransform(
                    get_train_aug(size=cfg.image_size),
                    get_valid_aug(size=cfg.image_size)
                )
            ],
            batch_tfms=[Normalize.from_stats(*imagenet_stats)]
        )
    
    dls = train_block.dataloaders(train_df, bs=cfg.batch_size)
    learn = train(dls, i)
    
    print('done \n')
    
    i+=1

print('training done \n')

training on fold: 0 



Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b4-6ed6700e.pth


HBox(children=(FloatProgress(value=0.0, max=77999237.0), HTML(value='')))


Loaded pretrained weights for efficientnet-b4


found lr of(0.0006309573538601399): 0.00063


epoch,train_loss,valid_loss,accuracy,time
0,0.772769,0.802121,0.798567,14:42


epoch,train_loss,valid_loss,accuracy,time
0,0.698022,0.645944,0.877551,14:36
1,0.686546,0.683674,0.863374,14:40
2,0.673845,0.657427,0.874124,14:40
3,0.666301,0.666814,0.873656,14:39
4,0.660521,0.643847,0.876305,14:39
5,0.63604,0.636592,0.884406,14:40
6,0.62386,0.635274,0.887833,14:41
7,0.590492,0.63362,0.885964,14:40
8,0.557987,0.631036,0.887366,14:42
9,0.595162,0.630954,0.887833,14:40


done 

training on fold: 1 

Loaded pretrained weights for efficientnet-b4


found lr of(0.001096478197723627): 0.0011


epoch,train_loss,valid_loss,accuracy,time
0,0.795359,0.702928,0.860882,14:42


epoch,train_loss,valid_loss,accuracy,time
0,0.690143,0.644047,0.876616,14:44
1,0.713191,0.656296,0.878953,14:43
2,0.713802,0.648703,0.875526,14:42
3,0.701608,0.64868,0.879888,14:50
4,0.6932,0.648264,0.883004,15:29
5,0.663546,0.637771,0.885496,15:55
6,0.629344,0.629314,0.886587,16:00
7,0.610421,0.621777,0.889858,15:43
8,0.577703,0.617495,0.894532,15:04
9,0.593584,0.618126,0.894688,14:49


done 

training on fold: 2 

Loaded pretrained weights for efficientnet-b4


found lr of(0.0019054606556892395): 0.00191


epoch,train_loss,valid_loss,accuracy,time
0,0.878635,0.96249,0.762424,14:49


epoch,train_loss,valid_loss,accuracy,time
0,0.718,0.663387,0.87132,14:50
1,0.733389,0.665876,0.873656,14:50
2,0.731055,0.692821,0.862284,14:52
3,0.713185,0.692696,0.858545,14:49
4,0.686808,0.675973,0.873189,15:12
5,0.672478,0.642075,0.880823,15:17
6,0.643822,0.642767,0.883938,15:26
7,0.627007,0.63718,0.881913,15:09
8,0.627621,0.621103,0.889079,15:25
9,0.608641,0.621857,0.890326,15:31


done 

training done 

