# Classifier

In this first step we are going to trian a classifier to say if there is an airplane or not in a patch .

In [None]:
import pandas as pd
import numpy as np
import random 
import matplotlib

import warnings
warnings.filterwarnings('ignore')

from utils import *

%matplotlib inline

## Load data

First, we load the data we generated before.

In [None]:
PATH = './dataset/patches_256_200'

df_t = pd.read_csv('{}/annotations_train.csv'.format(PATH))
df_v = pd.read_csv('{}/annotations_eval.csv'.format(PATH))

# convert string of bbs into list of bbs
df_t.annotations = anns_str2int(df_t.annotations.values)
df_v.annotations = anns_str2int(df_v.annotations.values)
#df_t = df_t[:1]
#df_v = df_t

df_t.sample(10)

We create a new column with a 1 if there are planes in the patch and 0 otherwise.

In [None]:
df_t["label"] = [int(len(anns) > 0) for anns in df_t.annotations.values]
df_v["label"] = [int(len(anns) > 0) for anns in df_v.annotations.values]

# add path to image name for simplicity
df_t.img_name = ['{}/{}'.format(PATH, img) for img in df_t.img_name.values]
df_v.img_name = ['{}/{}'.format(PATH, img) for img in df_v.img_name.values]


df_t.sample(5)

In [None]:
print("Training patches: ", len(df_t))
print("Validation patches: ", len(df_v))

## Dataset

Now we define our Dataset, which will define how images and labels are passed to the network.

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset 

class MyDataset(Dataset):
    def __init__(self, images, labels, transforms=None):
        self.images = images
        self.labels = labels
        self.transforms = transforms
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, ix):
        # open image
        img = open_image(self.images[ix])
        label = self.labels[ix]
        # apply transforms
        if self.transforms:
            augmented = self.transforms(image=img)
            img = augmented['image'] 
        # return tensor image and label
        return torch.from_numpy(img.transpose((2,0,1)).astype(np.float32)/255), label

We can use data augmentation.

In [None]:
from albumentations import (
    Compose, Resize, HorizontalFlip, VerticalFlip, Transpose, RandomRotate90, HueSaturationValue, RandomBrightness, GaussNoise
)

trans = {
    'train': Compose([
        Resize(224,224),
        HorizontalFlip(),
        VerticalFlip(),
        Transpose(),
        RandomRotate90(),
        HueSaturationValue(),
        RandomBrightness(),
        GaussNoise()
    ]),
    'val': Resize(224, 224)
}

#trans = None

dataset = {
    'train': MyDataset(df_t.img_name.values, df_t.label.values, trans['train']),
    'val': MyDataset(df_v.img_name.values, df_v.label.values, trans['val'])
}

In [None]:
# visualize random images

ds = dataset['train']

fig, axs = plt.subplots(3, 4, figsize=(15,10))
for i, _ax in enumerate(axs):
    for ix, ax in enumerate(_ax):
        
        ix = random.randint(0, len(ds)-1)
        img, label = ds[ix]

        # bring back image from tensor
        img = img.numpy().transpose((1, 2, 0))
        
        ax = show_image(img, ax=ax)
        ax.set_title(label)

## Model

Here we define our model. We will use a pretrained Resnet34 as a backbone network and define only the last layer to adapt to our problem.



In [None]:
import torchvision

class Net(nn.Module):

    def __init__(self, num_classes=2):
        super(Net, self).__init__()
        # get pre-trained resnet34
        self.model = torchvision.models.resnet34()
        # set new fc layer with our classes    
        num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.model(x)

In [None]:
# check if we can use GPU

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device) # should output cuda:0

In [None]:
net = Net()

# copy net to GPU
net.to(device)

# test net
test_input = torch.randn((16, 3, 224, 224))
output = net(test_input.to(device))
print(output.shape) # should output BATCH_SIZE x NUM_CLASSES

## Training

In order to train the network we need to define a Dataloader from our dataset in order to feed the network with batches of images.

In [None]:
from torch.utils.data import DataLoader

dataloader = {
    'train': DataLoader(dataset['train'], batch_size=16,  shuffle=True, num_workers=4),
    'val': DataLoader(dataset['val'], batch_size=16,  shuffle=False, num_workers=4)
}

In [None]:
imgs, labs = next(iter(dataloader['train']))
print(imgs.shape, labs.shape)

Now we define the optimizer and loss function to train the network.

In [None]:
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

We also need an evaluation metric.

In [None]:
def accuracy(preds, labels):
    _, preds = torch.max(preds.data, 1)
    total = labels.size(0)
    correct = (preds == labels).sum().item()
    return correct / total

Finally, we can proceed with the training.

In [None]:
def train(model, dataloader, criterion, optimizer):
    print('Training ...')
    model.train()
    losses = []
    for imgs, labels in tqdm(dataloader, ascii=True):        
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)       
        loss = criterion(outputs, labels)
        losses.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return np.mean(losses)

def test(model, dataloader, criterion, metric):
    print('Evaluating ...')    
    model.eval()
    losses, acc = [], []
    with torch.no_grad():
        for imgs, labels in tqdm(dataloader, ascii=True):
            imgs, labels = imgs.to(device), labels.to(device)        
            outputs = model(imgs)        
            loss = criterion(outputs, labels)
            losses.append(loss.item())
            acc.append(metric(outputs, labels))
    return np.mean(losses), np.mean(acc)

In [None]:
# training

EPOCHS = 30
train_loss = []
val_loss, acc, best_acc = [], [], 0
for epoch in range(EPOCHS):
    
    print('Epoch: {}/{}'.format(epoch+1, EPOCHS))
    
    t_loss = train(net, dataloader['train'], criterion, optimizer)
    train_loss.append(t_loss)
    
    v_loss, v_acc = test(net, dataloader['val'], criterion, accuracy)        
    val_loss.append(v_loss)
    acc.append(v_acc)
    
    print('Train Loss: {:.5f}. Val Loss: {:.5f}. Val acc: {:.5f}'.format(t_loss, v_loss, v_acc))
    
    # keep best model
    if v_acc > best_acc:
        best_acc = v_acc
        torch.save(net.state_dict(), './state_dict.pth')
        print('Best acc {}, model saved'.format(best_acc))
        
print('Best acc {}'.format(best_acc))

Visualize the training profile.

In [None]:
matplotlib.rcParams.update({'font.size': 16})
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))
plt.subplots_adjust(hspace=0.3)
ax1.plot(train_loss, linewidth=3, label='train')
ax1.plot(val_loss, ':', linewidth=3,  label='val')
ax1.set_title("Loss")
ax1.legend(loc='upper right')
ax1.grid()
ax2.plot(acc, linewidth=3, label="max: {:.4f}".format(np.array(acc).max()))
ax2.set_title("Accuracy")
ax2.grid()
ax2.legend(loc='bottom right',handlelength=0, handletextpad=0, fancybox=True)
ax2.set_xlabel("epoch")
plt.show()

## Test

Load the best model and make some predictions.

In [None]:
net.load_state_dict(torch.load('state_dict.pth'))
net.eval();

In [None]:
# visualize random images

ds = dataset['val']

fig, axs = plt.subplots(3, 4, figsize=(15,10))
for i, _ax in enumerate(axs):
    for ix, ax in enumerate(_ax):
        
        ix = random.randint(0, len(ds)-1)
        img, label = ds[ix]
        
        preds = net(img.unsqueeze(0).to(device)).squeeze()
        pred_label = torch.argmax(preds, dim=0)

        # bring back image from tensor
        img = img.numpy().transpose((1, 2, 0))
        
        ax = show_image(img, ax=ax)
        ax.set_title("GT: {} / Pred: {}".format(label, pred_label), color = "green" if label == pred_label else "red")