In [None]:
!pip install --upgrade efficientnet-pytorch

In [None]:
!pip install --upgrade albumentations

In [None]:
from google.colab import drive
drive.mount('content/')

Mounted at content/


In [None]:
%%time
!mkdir cassava_leaf
!unzip /content/content/MyDrive/Cassava_Leaf_Classification/cassava-leaf-disease-classification.zip -d cassava_leaf

In [1]:
import numpy as np
import pandas as pd
import os


import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional
import torchvision
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets, transforms, models
from PIL import Image
import cv2 as cv
from efficientnet_pytorch import EfficientNet
from os import listdir
from os.path import isfile, join
import time
from tqdm import tqdm
import copy
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split, StratifiedKFold
from sklearn.metrics import f1_score as f1
from sklearn.metrics import accuracy_score as accuracy
import json
import random

from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
    )
from albumentations.pytorch import ToTensorV2

from utils import *
from cutmix import *
from losses import *

In [2]:
seed_everything(7)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [3]:
path = '/content/cassava_leaf/'
train_data = pd.read_csv(path+'train.csv')
sub = pd.read_csv(path+'sample_submission.csv')
train_path = path+'train_images/'
test_path = path+'test_images/'

In [4]:
class CFG:
  version = 11
  img_size = 448
  N_FOLDS = 4
  seed = 7
  epochs = 1 #10 in original
  batch_size = 16
  n_workers = 0
  LR = 0.0001
  model_name = 'efficientnet-b4'

TRAIN=False #switch to True for training

In [5]:
transform_train = {
    'train': Compose([
            Resize(CFG.img_size, CFG.img_size),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            #OpticalDistortion(p=0.5),
            #GridDistortion(p=0.5),
            #GaussNoise(var_limit=(10.0, 50.0),p=0.5),
            #HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            #RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.4580, 0.5274, 0.3245], std=[0.2267, 0.2285, 0.2170]), #Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            #CoarseDropout(max_holes=20, max_height=20, max_width=20, p=0.5),
            Cutout(num_holes=np.random.randint(20), max_h_size=40, max_w_size=40, p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)
  ,
    'val': Compose([
            Resize(CFG.img_size, CFG.img_size),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            Normalize(mean=[0.4580, 0.5274, 0.3245], std=[0.2267, 0.2285, 0.2170]), #Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.),
}

In [6]:
folds = StratifiedKFold(n_splits=CFG.N_FOLDS, shuffle=True, random_state=CFG.seed)
X = train_data.iloc[:,:-1]
y =  train_data.iloc[:,-1:]

In [7]:
if TRAIN:
    for fold, (train_idx, val_idx) in enumerate(folds.split(X, y)):
        print("Fold {}/{}".format(fold + 1, CFG.N_FOLDS))

        model = EfficientNet.from_pretrained(CFG.model_name, num_classes=5) 
        model.to(device)
        valid = train_data.iloc[val_idx]
        valid.reset_index(drop=True, inplace=True)

        train = train_data.iloc[train_idx]
        train.reset_index(drop=True, inplace=True) 

        image_datasets = {'train': Leaf_Dataset(train, transforms = transform_train['train'], split_type='train', train_path = train_path, test_path = test_path),
                      'val': Leaf_Dataset(valid, transforms = transform_train['val'], split_type='val', train_path = train_path, test_path = test_path)}

        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=CFG.batch_size,
                                                 shuffle=True, num_workers=CFG.n_workers)
                  for x in ['train', 'val']}
        dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

        model.to(device)


        optimizer = torch.optim.Adam(model.parameters(), lr=CFG.LR)
        criterion = SymmetricCrossEntropy()
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=25, T_mult=1, eta_min=0.000001,
                                                                         last_epoch=-1, verbose=True)

        f1_score, fold_loss, fold_acc = train_val(model, CFG.epochs, fold, dataloaders, criterion, optimizer, scheduler, device)

        print("Fold №{} f1_score {}".format(fold+1, f1_score))
        print("Fold №{} loss {}".format(fold+1, fold_loss))
        print("Fold №{} ACC {}".format(fold+1, fold_acc))
        print()