In [1]:
## Importing necessary libraries.

!pip install timm
from glob import glob
import cv2
import torch
from torch import nn
import os
import time
import random
import cv2
import torchvision
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

import timm

import sklearn
import joblib
from sklearn import metrics
import cv2

The first function is to seed every random phenomenon, so that our experiments and scores are reproducible.<br>
The second function takes in image_path and reads the image. As we are using cv2 to read in the image, we also need to convert the image from bgr to rgb. After this we return the Image.<br>
The last function is to plot images, for the EDA.

In [21]:
'''
Helper functions
'''

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = cv2.cvtColor(im_bgr, cv2.COLOR_BGR2RGB )
    #print(im_rgb)
    return im_rgb

def plot_images(df):
    fig = plt.figure(figsize=(10,10))
    df = df.sample(9) 
    Images_shape = (512, 512)
    
    Image_names = list(df['image_id'])
    Image_labels = list(df['label'])
    Path_to_image = '../input/oxford102class/oxford-102-flowers/'
    Image_path = [os.path.join(Path_to_image,Image_name) for Image_name in Image_names]
    for i in range(9):
        plt.subplot(3,3,i+1)
        img = get_img(Image_path[i-1])
        img = cv2.resize(img, Images_shape)
        plt.gca().axes.get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        plt.imshow(img)
        plt.title(Image_labels[i-1])

You did not give any specific instruction. So this notebook is made with the assumption that you want me to use train, and valid for training and validitating the model. And the test to test the models performance.

In [2]:
#Reading in the 3 csv files:- train, valid, test

Base_Dir = '../input/oxford102class/oxford-102-flowers/'
train = pd.read_csv(os.path.join(Base_Dir,'train.txt'), sep = ' ', header=None)
test = pd.read_csv(os.path.join(Base_Dir,'test.txt'), sep = ' ', header=None)
valid = pd.read_csv(os.path.join(Base_Dir,'valid.txt'), sep = ' ', header=None)
column_names = ["image_id", "label"]
train.columns = column_names
test.columns = column_names
valid.columns = column_names

# Exploratory Data Analysis

In [3]:
print(f'No of train images: {len(train)}')
print(f'No of valid images: {len(valid)}')
print(f'No of test images: {len(test)}')

Visualizing Images

In [22]:
plot_images(train)

In [23]:
plot_images(valid)

In [24]:
plot_images(test)

# Model Training

Creating the config dict.<br>
The batch size used here is 64, after this the cuda memory of torch was running out.

In [14]:
CFG = {

    'seed': 719,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 256,
    'epochs': 80,
    'train_bs': 64,
    'valid_bs': 64,
    'T_0': 10,
    'lr': 1e-4,
    'min_lr': 1e-6,
    'weight_decay':1e-6,
    'num_workers': 4,
    'accum_iter': 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0'
#     'device': 'cpu'
}

Now we create the Dataset.<br>
This class will return the Images based on the index. We will those imageds to the dataloader.

In [16]:
#Creating the Image Dataset.

class FlowerDataset(Dataset):
    def __init__(self, df, data_root, 
                 transforms=None, 
                 output_label=True,
                ):
        
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.data_root = data_root
        
        self.output_label = output_label
        
        if output_label == True:
            self.labels = self.df['label'].values
            
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.labels[index]
          
        img  = get_img("{}/{}".format(self.data_root, self.df.loc[index]['image_id']))

        if self.transforms:
            img = self.transforms(image=img)['image']
        
        
        if self.output_label == True:
            return img, target
        else:
            return img

I am using the Albumentations library for the purpose of Image Augmenations. Here I am using a standard set of Augmentations, that I saw used in an flower classification challenge. I did not experiment a lot with this due to having less time.

In [17]:
#Image Augmentations 
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2

def get_train_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)
  
        
def get_valid_transforms():
    return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

Now we create the Model. Here I am using a pretrained Effecinet-b4 model trained on ImageNet weights. I experimented with few architectures, from resnet18 to efficientnet-b6 model. Efficientnet-b4 gave me the best accuracy, before this the models are simpler, and after this Efficientnet-b5 and b6 were starting to overfit.<br>


In [18]:
class FlowerImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x

Now we create the dataloader. You usually provide the Dataset to model in batches for training. This is done for many reasons, one being memory concerns.<br>
We also create the training and validation loop. 
- We are using autocast from torch.cuda.amp for mixed precision training. Thus giving us a significant boost

In [19]:
def prepare_dataloader(train_df, valid_df, data_root='../input/oxford102class/oxford-102-flowers/jpg/'):
    
    from catalyst.data.sampler import BalanceClassSampler
        
    train_ds = FlowerDataset(train_df, data_root, transforms=get_train_transforms(), output_label=True)
    valid_ds = FlowerDataset(valid_df, data_root, transforms=get_valid_transforms(), output_label=True)
    
    train_loader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=CFG['train_bs'],
        pin_memory=False,
        drop_last=False,
        shuffle=True,        
        num_workers=CFG['num_workers'],
        #sampler=BalanceClassSampler(labels=train_['label'].values, mode="downsampling")
    )
    val_loader = torch.utils.data.DataLoader(
        valid_ds, 
        batch_size=CFG['valid_bs'],
        num_workers=CFG['num_workers'],
        shuffle=False,
        pin_memory=False,
    )
    return train_loader, val_loader

def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scheduler=None, schd_batch_update=False):
    model.train()

    t = time.time()
    running_loss = None

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()

        #print(image_labels.shape, exam_label.shape)
        with autocast():
            image_preds = model(imgs)   #output = model(input)
            #print(image_preds.shape, exam_pred.shape)

            loss = loss_fn(image_preds, image_labels)
            
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)

                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 
                
                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                
                pbar.set_description(description)
                
    if scheduler is not None and not schd_batch_update:
        scheduler.step()
        
def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False):
    model.eval()

    t = time.time()
    loss_sum = 0
    sample_num = 0
    image_preds_all = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()
        
        image_preds = model(imgs)   #output = model(input)
        #print(image_preds.shape, exam_pred.shape)
        image_preds_all += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
        loss = loss_fn(image_preds, image_labels)
        
        loss_sum += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
    
    image_preds_all = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    print('validation multi-class accuracy = {:.4f}'.format((image_preds_all==image_targets_all).mean()))
    
    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()

This is the training loop. We create the train and valid dataloader, the model, optimizer, scheduler etc. Then we start the training process. After every epoch of training we store the model state dict.

In [20]:
if __name__ == '__main__':
     # for training only, need nightly build pytorch

    seed_everything(CFG['seed'])
    
    train_loader, val_loader = prepare_dataloader(train, valid, data_root='../input/oxford102class/oxford-102-flowers/')

    device = torch.device(CFG['device'])
        
    model = FlowerImgClassifier(CFG['model_arch'], train.label.nunique(), pretrained=True).to(device)
    scaler = GradScaler()   
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, gamma=0.1, step_size=CFG['epochs']-1)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=25, 
    #                                                max_lr=CFG['lr'], epochs=CFG['epochs'], steps_per_epoch=len(train_loader))
        
    loss_tr = nn.CrossEntropyLoss().to(device) #MyCrossEntropyLoss().to(device)
    loss_fn = nn.CrossEntropyLoss().to(device)
        
    for epoch in range(CFG['epochs']):
        train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, scheduler=scheduler, schd_batch_update=False)

        with torch.no_grad():
                valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False)

        torch.save(model.state_dict(),'{}_epoch_{}'.format(CFG['model_arch'], epoch))
            
    #torch.save(model.cnn_model.state_dict(),'{}/cnn_model_fold_{}_{}'.format(CFG['model_path'], fold, CFG['tag']))
    del model, optimizer, train_loader, val_loader, scaler, scheduler
    torch.cuda.empty_cache()

The training is over. The best accuracy we got was on epoch 75

# Inference

I made sure to only use the test dataset once. At the very end. After all the experiments, the model that performed the best on the validation dataset, was used to predict the test images labels.
<br>We write the inference one epoch function. 

In [42]:
def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []
    
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        
        image_preds = model(imgs)   #output = model(input)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
        
    
    image_preds_all = np.concatenate(image_preds_all)
    return image_preds_all

We create the test_dataset and the test_dataloader. Then we load our previously trained model and predict for the test images.

In [34]:
from sklearn import metrics
if __name__ == '__main__':
     # for training only, need nightly build pytorch

    seed_everything(CFG['seed'])
     
    test_ds = FlowerDataset(test, '../input/oxford102class/oxford-102-flowers/', transforms=get_valid_transforms(), output_label=False)
        
    tst_loader = torch.utils.data.DataLoader(
            test_ds, 
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

    device = torch.device(CFG['device'])
    model = FlowerImgClassifier(CFG['model_arch'], train.label.nunique()).to(device)
        
    tst_preds = []
        
    model.load_state_dict(torch.load('./tf_efficientnet_b4_ns_epoch_75'))
    with torch.no_grad():
                tst_preds += [inference_one_epoch(model, tst_loader, device)]

In [35]:
np.array(tst_preds).shape

In [36]:
test_preds = np.argmax(tst_preds, axis=2)

In [37]:
test_preds.shape

In [38]:
test_preds

In [39]:
test_preds.shape

In [41]:
print('test accuracy = {:.5f}'.format((test.label.values==test_preds).mean()))
        

The final test accuracy is 0.90259