This is a example training script for image classification using TorchUtils.
The dataset and the task are introduced by Mu Li, at [Kaggle](https://www.kaggle.com/c/classify-leaves).

## Prepare Lib

In [1]:
# First, you should install TorchUtils (see README.md)

In [2]:
import os
import cv2
import time
import math
import torch
import torch.nn as nn
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.optim import Adam, AdamW
from torch.nn.parameter import Parameter
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from sklearn import metrics
import urllib
import pickle
import torch.nn.functional as F
import seaborn as sns
import random
import sys
import gc
import shutil
from tqdm.autonotebook import tqdm
import albumentations
from albumentations import pytorch as AT

import scipy.special
sigmoid = lambda x: scipy.special.expit(x)
from scipy.special import softmax

import torch_utils as tu 

import warnings
warnings.filterwarnings("ignore")

In [3]:
SEED = 42
base_dir = '../input/'
tu.tools.seed_everything(SEED, deterministic=False)
tu.tools.set_gpus('0,1') # gpu ids

In [4]:
EXP = 1
while os.path.exists('../exp/exp%d'%EXP):
    EXP+=1
os.makedirs('../exp/exp%d'%EXP)

## Param

In [5]:
CLASSES = 176
FOLD = 5
BATCH_SIZE = 64
ACCUMULATE = 1
LR = 3e-4
EPOCH = 36
DECAY_SCALE = 20.0
MIXUP = 0 # 0 to 1

## Dataset

In [6]:
train_transform = albumentations.Compose([
    albumentations.RandomRotate90(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.Flip(p=0.5),
    albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.0625, rotate_limit=45, border_mode=1, p=0.5),
    tu.randAugment(),
    albumentations.Normalize(),
    AT.ToTensorV2(),
    ])
    
test_transform = albumentations.Compose([
    albumentations.Normalize(),
    AT.ToTensorV2(),
    ])


class LeavesDataset(Dataset):
    
    def __init__(self, df, label_encoder, data_path='../input', transform = train_transform): 
        self.df = df 
        self.data_path = data_path
        self.transform = transform
        self.df.label = self.df.label.apply(lambda x: label_encoder[x])

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        img_path, label = self.df.image[idx], self.df.label[idx]
        img_path = os.path.join(self.data_path, img_path)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transform(image = img)['image']
        return img, label

In [7]:
train_df = pd.read_csv(os.path.join(base_dir, 'train.csv'))

In [8]:
train_df.head()

Unnamed: 0,image,label
0,images/0.jpg,maclura_pomifera
1,images/1.jpg,maclura_pomifera
2,images/2.jpg,maclura_pomifera
3,images/3.jpg,maclura_pomifera
4,images/4.jpg,maclura_pomifera


In [9]:
sfolder = StratifiedKFold(n_splits=FOLD,random_state=SEED,shuffle=True)
tr_folds = []
val_folds = []
for train_idx, val_idx in sfolder.split(train_df.image, train_df.label):
    tr_folds.append(train_idx)
    val_folds.append(val_idx)
    print(len(train_idx), len(val_idx))

14682 3671
14682 3671
14682 3671
14683 3670
14683 3670


## Training Loop

In [10]:
from torch.optim.lr_scheduler import CosineAnnealingLR
scaler = torch.cuda.amp.GradScaler() # for AMP training

In [11]:
def train_model(epoch, verbose=False):
    model_conv.train()         
    avg_loss = 0.
    optimizer.zero_grad()
    if verbose:
        bar = tqdm(total=len(train_loader))
    mixup_fn = tu.Mixup(prob=MIXUP, switch_prob=0.0, onehot=True, label_smoothing=0.05, num_classes=CLASSES)
    for idx, (imgs, labels) in enumerate(train_loader):
        imgs_train, labels_train = imgs.float().cuda(), labels.cuda()
        if MIXUP:
            imgs_train, labels_train = mixup_fn(imgs_train, labels_train)
        with torch.cuda.amp.autocast():
            output_train, _ = model_conv(imgs_train)
            loss = criterion(output_train, labels_train)
        scaler.scale(loss).backward()
        if ((idx+1)%ACCUMULATE==0): # Gradient Accumulate
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()
        avg_loss += loss.item() / len(train_loader) 
        if verbose:
            bar.update(1)
    if verbose:
        bar.close()
    return avg_loss

def test_model():    
    avg_val_loss = 0.
    model_conv.eval()
    y_true_val = np.zeros(len(valset))
    y_pred_val = np.zeros((len(valset), CLASSES))
    with torch.no_grad():
        for idx, (imgs, labels) in enumerate(val_loader):
            imgs_vaild, labels_vaild = imgs.float().cuda(), labels.cuda()
            output_test, _ = model_conv(imgs_vaild)
            avg_val_loss += (criterion_test(output_test, labels_vaild).item() / len(val_loader)) 
            a = labels_vaild.detach().cpu().numpy().astype(np.int)
            b = softmax(output_test.detach().cpu().numpy(), axis=1)

            y_true_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = a
            y_pred_val[idx*BATCH_SIZE:idx*BATCH_SIZE+b.shape[0]] = b
            
    metric_val = sum(np.argmax(y_pred_val, axis=1) == y_true_val) / len(y_true_val)
    return avg_val_loss, metric_val

In [12]:
def train(fold):
    best_avg_loss = 100.0
    best_acc = 0.0

    avg_val_loss, avg_val_acc = test_model()
    print('pretrain val loss %.4f precision %.4f'%(avg_val_loss, avg_val_acc))       

    ### training
    for epoch in range(EPOCH):   
        print('lr:', optimizer.param_groups[0]['lr']) 
        np.random.seed(SEED+EPOCH*999)
        start_time = time.time()
        avg_loss = train_model(epoch)
        avg_val_loss, avg_val_acc = test_model()
        elapsed_time = time.time() - start_time 
        print('Epoch {}/{} \t train_loss={:.4f} \t val_loss={:.4f} \t val_precision={:.4f} \t time={:.2f}s'.format(
            epoch + 1, EPOCH, avg_loss, avg_val_loss, avg_val_acc, elapsed_time))

        if avg_val_loss < best_avg_loss:
            best_avg_loss = avg_val_loss

        if avg_val_acc > best_acc:
            best_acc = avg_val_acc
            torch.save(model_conv.module.state_dict(), '../exp/exp' + str(EXP) + '/model-best' + str(fold) + '.pth')
            print('model saved!')

        print('=================================')   

    print('best loss:', best_avg_loss)
    print('best precision:', best_acc)
    return best_avg_loss, best_acc

In [None]:
log = open('../exp/exp' + str(EXP) +'/log.txt', 'w')
log.write('SEED%d\n'%SEED)
cv_losses = []
cv_metrics = []

for fold in range(FOLD):
    print('\n ********** Fold %d **********\n'%fold)
    ###################### Dataset #######################
    labels = train_df.label.unique()
    label_encoder = {}
    for idx, name in enumerate(labels):
        label_encoder.update({name:idx})
    
    trainset = LeavesDataset(train_df.iloc[tr_folds[fold]].reset_index(), label_encoder, base_dir, train_transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, num_workers=16, shuffle=True, drop_last=True, worker_init_fn=tu.tools.worker_init_fn)
    
    valset = LeavesDataset(train_df.iloc[val_folds[fold]].reset_index(), label_encoder, base_dir, test_transform)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)
    
    ####################### Model ########################
    model_conv = tu.ImageModel(name='resnest50d', pretrained=True, num_feature=2048, classes=CLASSES)
    model_conv.cuda()
    model_conv = torch.nn.DataParallel(model_conv)

    ###################### Optim ########################
    optimizer = tu.RangerLars(model_conv.parameters(), lr=LR, weight_decay=2e-4)

    if MIXUP:
        criterion = tu.SoftTargetCrossEntropy()
    else:
        criterion = tu.LabelSmoothingCrossEntropy()
        
    criterion_test = nn.CrossEntropyLoss()

    T = len(train_loader)//ACCUMULATE * EPOCH # cycle
    scheduler = CosineAnnealingLR(optimizer, T_max=T, eta_min=LR/DECAY_SCALE)
    
    val_loss, val_acc = train(fold)
    
    cv_losses.append(val_loss)
    cv_metrics.append(val_acc)
    torch.cuda.empty_cache()

cv_loss = sum(cv_losses) / FOLD
cv_acc = sum(cv_metrics) / FOLD
print('CV loss:%.6f  CV precision:%.6f'%(cv_loss, cv_acc))
log.write('CV loss:%.6f  CV precision:%.6f\n\n'%(cv_loss, cv_acc))


 ********** Fold 0 **********

pretrain val loss 9.4469 precision 0.0057
lr: 0.0003


In [None]:
log.close()
tu.tools.backup_folder('.', '../exp/exp%d/src'%EXP)  # backup code