In [82]:
import os
import random
import pandas as pd
import numpy as np
from PIL import Image

import torch
import timm
from torch import nn, optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from ranger import Ranger
#https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from albumentations import (HueSaturationValue, IAAAdditiveGaussianNoise, MedianBlur, RandomBrightnessContrast, Compose, Cutout, CoarseDropout)
from albumentations.pytorch import ToTensorV2

import logging
logging.getLogger().setLevel(logging.INFO)

import warnings
warnings.filterwarnings('ignore')


In [83]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    logging.info(f"Running on {torch.cuda.get_device_name()}")
else:
    device = torch.device("cpu")
    logging.info("Running on a CPU")
    

INFO:root:Running on NVIDIA GeForce GTX 1060 6GB


In [84]:
#seed everything for reproducibility
seed = 55555

random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


In [85]:
DATA_PATH = "./data/"

In [86]:
from matplotlib import pyplot as plt
%matplotlib inline

In [212]:
images = []
labels = []
for image_name in os.listdir(DATA_PATH):
    with open(DATA_PATH+image_name, 'rb') as f:
        image = Image.open(f)
        image = np.array(image)

        if image.shape[-1] == 4: #removing alpha channel
            image = image[..., :3]

        image = np.rollaxis(image, -1, 0)[0]
        image[image>50] = 255.0
        image[image<50] = 0.0

        if len(image.shape) == 2: #converting single channel image to 3 channel
            image = np.stack((image,)  *3, axis = -1)       

        label = image_name[:-4]  

        images.append(image)
        labels.append(label)

In [213]:
dataset = pd.DataFrame(columns=["Images", "Labels"])
dataset["Images"] = images
dataset["Labels"] = labels

In [214]:
#create training, validation and test dataframes

train_df, valid_df = train_test_split(dataset, test_size=0.1, random_state=seed, shuffle=True)
valid_df, test_df = train_test_split(valid_df, test_size=0.33, random_state=seed, shuffle=True)

In [87]:
#hyperparameters
image_height = 50
image_width = 180
batch_size = 20
gradient_accumulation_steps = 10
learning_rate = 0.001
epochs = 100
label_counts = 10


In [216]:
#custom dataset class that augments data during training

class Dataset(torch.utils.data.Dataset):
    def __init__(self, df, mode="train", transforms=None):
        super().__init__()
        self.df_data = df.values
        self.transforms = transforms
        self.mode = mode

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        image, labels = self.df_data[index]

        if self.transforms is not None:
            image = self.transforms(image=image)["image"]

        labels = torch.tensor([int(i) for i in labels])

        return image, labels

#data augmentation used both for training and validation
#When and Why Test-Time Augmentation Works:
#https://arxiv.org/abs/2011.11156

transform = Compose([
            MedianBlur(blur_limit = 7, always_apply = False, p = 0.3),
            IAAAdditiveGaussianNoise(scale = (0, 0.15 * 255), p = 0.5),
            HueSaturationValue(hue_shift_limit = 0.2, sat_shift_limit = 0.2, val_shift_limit = 0.2, p = 0.4),
            RandomBrightnessContrast(brightness_limit = (-0.1, 0.1), contrast_limit = (-0.1, 0.1), p = 0.5),
            CoarseDropout(p = 0.1),
            Cutout(p = 0.4),
            ToTensorV2(p = 1.0),
        ], p = 1.)

test_transform = Compose([
            ToTensorV2(p = 1.0),
        ], p = 1.0)

train_dataset = Dataset(train_df, transforms = transform)
valid_dataset = Dataset(valid_df, transforms = transform)
test_dataset = Dataset(test_df, transforms = test_transform)


In [217]:
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
from catalyst.data.sampler import BalanceClassSampler

train_dataloader = DataLoader(
            train_dataset,
            #balanced sampler is used to minimize harmful effects of dataset not being fully balanced
            sampler=RandomSampler(train_dataset),
            batch_size = batch_size)
valid_dataloader = DataLoader(
            valid_dataset,
            sampler = SequentialSampler(valid_dataset),
            batch_size = batch_size)
test_dataloader = DataLoader(
            test_dataset,
            sampler = SequentialSampler(test_dataset),
            batch_size = 1)


In [88]:
class EnClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, 4096)
        self.classifier1 = nn.Linear(4096, n_class)
        self.classifier2 = nn.Linear(4096, n_class)
        self.classifier3 = nn.Linear(4096, n_class)
        self.classifier4 = nn.Linear(4096, n_class)
        self.classifier5 = nn.Linear(4096, n_class)
        
    def forward(self, x):
        x = self.model(x)
        x1 = self.classifier1(x)
        x2 = self.classifier2(x)
        x3 = self.classifier3(x)
        x4 = self.classifier4(x)
        x5 = self.classifier5(x)
        output = torch.cat((x1, x2, x3, x4, x5), dim=-1)
        output = torch.reshape(output, (output.size()[0], 5, 10))

        return output

net = EnClassifier("tf_efficientnet_b0_ns", 10).to(device)


In [219]:
import copy
loss_function = nn.CrossEntropyLoss()
#Bi-Tempered Logistic Loss for Training Neural Nets with Noisy Data:
#https://ai.googleblog.com/2019/08/bi-tempered-logistic-loss-for-training.html
#https://github.com/google/bi-tempered-loss
#provides increase in accuracy on noisy data
#optimizer = optim.AdamW(net.parameters(), lr = learning_rate, weight_decay = 0.9, betas = (0.9, 0.999))
optimizer = Ranger(net.parameters(), lr = learning_rate, weight_decay = 0.9, betas = (0.9, 0.999))
scheduler = CosineAnnealingLR(optimizer, epochs * 0.3, learning_rate * 0.0001)
scheduler.last_epoch = epochs
scaler = GradScaler()

def training_loop():
    best_model_wts = copy.deepcopy(net.state_dict())
    best_loss = 100
    
    for epoch in (range(epochs)):
        if epoch != 0 and epoch > 0.3 * epochs:
            scheduler.step()
        logging.info(f"Epoch {epoch+1}")

        logging.info("Training")
        train_losses, train_accuracies = forward_pass(train_dataloader, train = True)  

        logging.info("Validating")
        val_losses, val_accuracies = forward_pass(valid_dataloader)        

        logging.info(f"Training accuracy:   {sum(train_accuracies)/len(train_accuracies)} | Training loss: {sum(train_losses)/len(train_losses)}")
        logging.info(f"Validation accuracy: {sum(val_accuracies)/len(val_accuracies)} | Validation loss: {sum(val_losses)/len(val_losses)}")

        if epoch == epochs:
            net.load_state_dict(torch.load("best.pth"))
            net.eval()
            logging.info("Testing")
            test_losses, test_accuracies = forward_pass(test_dataloader)
            logging.info(f"Test accuracy: {sum(test_accuracies)/len(test_accuracies)} | Test loss: {sum(test_losses)/len(test_losses)}")
        
        epoch_val_loss = sum(val_losses)/len(val_losses)
        
        if best_loss > epoch_val_loss:    
            best_loss = epoch_val_loss
            best_model_wts = copy.deepcopy(net.state_dict())
            torch.save(net.state_dict(), "best.pth")
            logging.info(f"Saving with loss of {epoch_val_loss}, improved over previous {best_loss}")


Ranger optimizer loaded. 
Gradient Centralization usage = True
GC applied to both conv and fc layers


In [220]:
def get_loss(outputs, labels):
        losses = []
        for i in range(len(outputs)):
            loss = loss_function(outputs[i], labels[i])
            losses.append(loss)
        final_loss = sum(losses)

        return final_loss

def get_matches(outputs, labels):
    matches = []
    for i in range(len(outputs)):
        is_match = False
        for j in range(len(outputs[i])):
            outputs_argmax = [torch.argmax(j).item() for j in outputs[i]]
            labels_list = labels[i].tolist()
            if outputs_argmax == labels_list:
                is_match = True
        matches.append(is_match)

        return matches

def forward_pass(dataloader, train=False):
    if train:
        net.train()
    else:
        net.eval()

    losses = []
    accuracies = []

    for step, batch in (enumerate(dataloader)):
        inputs = batch[0].to(device).float()
        labels = batch[1].to(device).long()

        with autocast():
            if train:
                outputs = net(inputs)
                loss = get_loss(outputs, labels)
                scaler.scale(loss).backward()
            else:
                with torch.no_grad():
                    outputs = net(inputs)
                    loss = get_loss(outputs, labels)

        matches = get_matches(outputs, labels)               
        acc = matches.count(True)/len(matches)                               
        losses.append(loss)
        accuracies.append(acc)
        
        if train and (step+1) % gradient_accumulation_steps == 0:
        #gradient accumulation to train with bigger effective batch size 
        #with less memory use
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()            
            logging.info(f"Step {step} of {len(train_dataloader)}, Accuracy: {sum(accuracies)/len(accuracies)}, Loss: {sum(losses)/len(losses)}")    
    return losses, accuracies


In [221]:
training_loop()

INFO:root:Epoch 1
INFO:root:Training
INFO:root:Step 9 of 140, Accuracy: 0.0, Loss: 46.149169921875
INFO:root:Step 19 of 140, Accuracy: 0.0, Loss: 46.13117980957031
INFO:root:Step 29 of 140, Accuracy: 0.0, Loss: 46.10929870605469
INFO:root:Step 39 of 140, Accuracy: 0.0, Loss: 46.10810852050781
INFO:root:Step 49 of 140, Accuracy: 0.0, Loss: 46.11796951293945
INFO:root:Step 59 of 140, Accuracy: 0.0, Loss: 46.10346984863281
INFO:root:Step 69 of 140, Accuracy: 0.0, Loss: 46.04608154296875
INFO:root:Step 79 of 140, Accuracy: 0.0, Loss: 46.00121307373047
INFO:root:Step 89 of 140, Accuracy: 0.0, Loss: 45.959190368652344
INFO:root:Step 99 of 140, Accuracy: 0.0, Loss: 45.9083137512207
INFO:root:Step 109 of 140, Accuracy: 0.0, Loss: 45.89010238647461
INFO:root:Step 119 of 140, Accuracy: 0.0, Loss: 45.87166976928711
INFO:root:Step 129 of 140, Accuracy: 0.0, Loss: 45.85173416137695
INFO:root:Step 139 of 140, Accuracy: 0.0, Loss: 45.635353088378906
INFO:root:Validating
INFO:root:Training accuracy:  

In [26]:
net.load_state_dict(torch.load("best.pth"))
net.eval()
logging.info("Testing")
test_losses, test_accuracies = forward_pass(test_dataloader)
logging.info(f"Test accuracy: {sum(test_accuracies)/len(test_accuracies)} | Test loss: {sum(test_losses)/len(test_losses)}")

INFO:root:Testing
INFO:root:Test accuracy: 1.0 | Test loss: 0.00047525743138976395
