## Pre-train the find-grained classifers using Pytorch

The fine-grained classifier, designed for plug-and-play, is made up of a deep residual network ResNet34, an AdaptiveAvgPool2d for dimensional reduction, and a final linear layer for prediction.

In [17]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

import time
import os
import PIL.Image as Image
from IPython.display import display

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Load the data and transform

First, lets create some transforms for our data and load the train/test data+labels from the folders.

Here we are using images with random horizontal flip, random rotation and normalization

In [23]:

train_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomRotation(15),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
test_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# car_classifer
# dataset_dir = "../input/car_data/car_data/"
# dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
# dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)

# bird_classifer
# dataset_dir = "../input/100-bird-species/"
# dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
# dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)

# dog_classifer
dataset_dir = "../input/dog-breed-classification/"
dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"dog/train", transform = train_tfms)
dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"dog/test", transform = test_tfms)

# cat_classifer67
# dataset_dir = "../input/cat-breed-classification67-mini/train_valid_test/"
# dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
# dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)

# human_age_classifer
# dataset_dir = "../input/age-stage-identification/new/"
# dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
# dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)

# facial_expression_classifer
# dataset_dir = "../input/mma-facial-expression/MMAFEDB/"
# dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = train_tfms)
# dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)

trainloader = torch.utils.data.DataLoader(dataset, batch_size = 32, shuffle=True, num_workers = 2)
testloader = torch.utils.data.DataLoader(dataset2, batch_size = 32, shuffle=False, num_workers = 2)


## Model training function

Here we train our model, after each epoch, we test the model on the test data to see how it's going

In [24]:
def train_model(model, criterion, optimizer, scheduler, n_epochs = 1):
    
    losses = []
    accuracies = []
    test_accuracies = []
    # set the model to train mode initially
    model.train()
    for epoch in range(n_epochs):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        for i, data in enumerate(trainloader, 0):

            # get the inputs and assign them to cuda
            inputs, labels = data
            #inputs = inputs.to(device).half() # uncomment for half precision model
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # calculate the loss/acc later
            running_loss += loss.item()
            running_correct += (labels==predicted).sum().item()

        epoch_duration = time.time()-since
        epoch_loss = running_loss/len(trainloader)
        epoch_acc = 100/32*running_correct/len(trainloader)
        print("Epoch %s, duration: %d s, loss: %.4f, acc: %.4f" % (epoch+1, epoch_duration, epoch_loss, epoch_acc))
        
        losses.append(epoch_loss)
        accuracies.append(epoch_acc)
        
        # switch the model to eval mode to evaluate on test data
        model.eval()
        test_acc = eval_model(model)
        test_accuracies.append(test_acc)
        
        # re-set the model to train mode after validating
        model.train()
        scheduler.step(test_acc)
        since = time.time()
    print('Finished Training')
    return model, losses, accuracies, test_accuracies

    

### Evaluate on training data
This function is called out after each epoch of training on the training data. We then measure the accuracy of the model.

In [25]:
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, labels = data
            #images = images.to(device).half() # uncomment for half precision model
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model_ft(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    print('Accuracy of the network on the test images: %d %%' % (
        test_acc))
    return test_acc

In [26]:
model_ft = models.resnet34(pretrained=True)
num_ftrs = model_ft.fc.in_features

# replace the last fc layer with an untrained one (requires grad by default)

# car_classification
# model_ft.fc = nn.Linear(num_ftrs, 196)

# bird_classification
# model_ft.fc = nn.Linear(num_ftrs, 450)

# dog_classification
model_ft.fc = nn.Linear(num_ftrs, 120)

# cat_classification67
# model_ft.fc = nn.Linear(num_ftrs, 67)

# human_age_classification
# model_ft.fc = nn.Linear(num_ftrs, 10)

# human_facial_expression
# model_ft.fc = nn.Linear(num_ftrs, 7)
model_ft = model_ft.to(device)
# print("model_ft:")
# print(model_ft)
# uncomment this block for half precision model
"""
model_ft = model_ft.half()


for layer in model_ft.modules():
    if isinstance(layer, nn.BatchNorm2d):
        layer.float()
"""
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)#before 0.01,momentum=1

"""
probably not the best metric to track, but we are tracking the training accuracy and measuring whether
it increases by atleast 0.9 per epoch and if it hasn't increased by 0.9 reduce the lr by 0.1x.
However in this model it did not benefit me.
"""
# lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.1,factor=0.1)

In [27]:
model_ft, training_losses, training_accs, test_accs = train_model(model_ft, criterion, optimizer, lrscheduler, n_epochs=20)

Epoch 1, duration: 132 s, loss: 2.0573, acc: 48.3724
Accuracy of the network on the test images: 55 %
Epoch 2, duration: 124 s, loss: 1.1496, acc: 65.7552
Accuracy of the network on the test images: 49 %
Epoch 3, duration: 136 s, loss: 0.8900, acc: 72.6454
Accuracy of the network on the test images: 57 %
Epoch 4, duration: 124 s, loss: 0.7393, acc: 77.0399
Accuracy of the network on the test images: 61 %
Epoch 5, duration: 124 s, loss: 0.6142, acc: 80.3168
Accuracy of the network on the test images: 63 %
Epoch 6, duration: 128 s, loss: 0.5033, acc: 83.7782
Accuracy of the network on the test images: 60 %
Epoch 7, duration: 124 s, loss: 0.4082, acc: 86.8598
Accuracy of the network on the test images: 66 %
Epoch 8, duration: 124 s, loss: 0.3464, acc: 88.8563
Accuracy of the network on the test images: 63 %
Epoch 9, duration: 125 s, loss: 0.1507, acc: 95.8116
Accuracy of the network on the test images: 76 %
Epoch 10, duration: 124 s, loss: 0.0894, acc: 97.9818
Accuracy of the network on t

### As we can see the model reached 90% training accuracy by epoch 10.


In [None]:
# plot the stats

f, axarr = plt.subplots(2,2, figsize = (12, 8))
axarr[0, 0].plot(training_losses)
axarr[0, 0].set_title("Training loss")
axarr[0, 1].plot(training_accs)
axarr[0, 1].set_title("Training acc")
axarr[1, 0].plot(test_accs)
axarr[1, 0].set_title("Test acc")

### Evaluate the model on single images (e.g for production)

Next we can use the model on our own images. For that we need to tie the class numbers for which the model returns probablities with the names of those classes.


In [None]:
# tie the class indices to their names

def find_classes(dir):
    classes = os.listdir(dir)
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx
# car classification
# classes, c_to_idx = find_classes(dataset_dir+"train")

# bird classification
# classes, c_to_idx = find_classes(dataset_dir+"train")

# dog classification
classes, c_to_idx = find_classes(dataset_dir+"dog/train")

# cat classification67
# classes, c_to_idx = find_classes(dataset_dir+"train")

# human age classification
# classes, c_to_idx = find_classes(dataset_dir+"train")

In [None]:
# test the model on random images
# switch the model to evaluation mode to make dropout and batch norm work in eval mode
model_ft.eval()

# transforms for the input image
loader = transforms.Compose([transforms.Resize((400, 400)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
image = Image.open(dataset_dir+"/test/affenpinscher/42404238339c427c0fb830da89885d5e.jpg")
image = loader(image).float()
image = torch.autograd.Variable(image, requires_grad=True)
image = image.unsqueeze(0)
image = image.cuda()
print("image shape="+image.shape)
output = model_ft(image)
print("output=")
print(output)
torch.save(model_ft.state_dict(), './identification_dog.pt')
conf, predicted = torch.max(output.data, 1)
print("conf="+cof)
print("predicted="+predicted)

In [None]:
# get the class name of the prediction

# car_classification
# display(Image.open(dataset_dir+"test/Mercedes-Benz C-Class Sedan 2012/01977.jpg"))

# bird_classification
# display(Image.open(dataset_dir+"test/ABBOTTS BABBLER/1.jpg"))

# cat_classification
# display(Image.open(dataset_dir+"cat/test/American Bobtail/AmericanBobtail-13611304_5083.jpg"))

# dog_classification
# display(Image.open(dataset_dir+"dog/test/afghan_hound/08829c00da02dea80eb491122989492f.jpg"))
# human_age_classification
display(Image.open(dataset_dir+"/test/affenpinscher/42404238339c427c0fb830da89885d5e.jpg"))
print(classes[predicted.item()], "confidence: ", conf.item())

In [None]:
import os
os.chdir('/kaggle/working')
print(os.getcwd())
print(os.listdir('/kaggle/working'))
from IPython.display import FileLink
FileLink('identification_dog.pt')