INSTALL IMPORTANT DEPENDENCIES

In [1]:
import numpy as np  #linear algebra
import pandas as pd # data processing, CSV file I/) (e.g. pd.read_cvs)
import os

In [2]:
import torch
import torchvision
from torchvision import datasets as datasets
from torchvision import transforms as T # for simplifying the transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models

In [3]:
import timm
from timm.loss import LabelSmoothingCrossEntropy # This is better than normal nn.CrossEntropyLoss

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# remove warnings
import warnings
warnings.filterwarnings("ignore")

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
import sys
from tqdm import tqdm
import time
import copy

In [7]:
def get_classes(data_dir):
    all_data = datasets.ImageFolder(data_dir)
    return all_data.classes

In [8]:
def get_data_loaders(data_dir, batch_size, train = False):
    if train:
        #train
        transform = T.Compose([   # augmentation transformations occurs here (random horizontal and vertical flips, random color jitter, resizing, center cropping, converting to tensor, normalization (using ImageNet means and standard deviations), and random erasing for regularization.)
            T.RandomHorizontalFlip(),
            T.RandomVerticalFlip(),
            T.RandomApply(torch.nn.ModuleList([T.ColorJitter()]), p=0.25),
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
            T.RandomErasing(p=0.2, value='random')
        ])
        # train_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform = transform)
        train_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform = transform)
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return train_loader, len(train_data)
    else:
        # val/test
        transform = T.Compose([ # We dont need augmentation for test transforms
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
        ])
        val_data = datasets.ImageFolder(os.path.join(data_dir, "valid/"), transform=transform)
        test_data = datasets.ImageFolder(os.path.join(data_dir, "test/"), transform=transform)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
        test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)
        return val_loader, test_loader, len(val_data), len(test_data)

In [9]:
dataset_path = "input/"

In [10]:
(train_loader, train_data_len) = get_data_loaders(dataset_path, 128, train=True)
(val_loader, test_loader, valid_data_len, test_data_len) = get_data_loaders(dataset_path, 32, train=False)


In [11]:
classes = get_classes("input/train/")
print(classes, len(classes))

['Bear', 'Bird', 'Cat', 'Cow', 'Deer', 'Dog', 'Dolphin', 'Elephant', 'Giraffe', 'Horse', 'Kangaroo', 'Lion', 'Panda', 'Tiger', 'Zebra'] 15


In [12]:
dataloaders = {
    "train": train_loader,
    "val": val_loader
}
dataset_sizes = {
    "train": train_data_len,
    "val": valid_data_len
}

In [13]:
print(len(train_loader), len(val_loader), len(test_loader))

13 6 6


In [14]:
print(train_data_len, valid_data_len, test_data_len)

1576 184 184


In [15]:
# now, for the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [17]:
model = torch.hub.load('facebookresearch/deit:main', 'deit_tiny_patch16_224', pretrained=True)

Downloading: "https://github.com/facebookresearch/deit/zipball/main" to C:\Users\Dell 5480/.cache\torch\hub\main.zip
Downloading: "https://dl.fbaipublicfiles.com/deit/deit_tiny_patch16_224-a1311bcf.pth" to C:\Users\Dell 5480/.cache\torch\hub\checkpoints\deit_tiny_patch16_224-a1311bcf.pth
100%|██████████| 21.9M/21.9M [02:31<00:00, 151kB/s] 


In [18]:
for param in model.parameters(): #freeze model
    param.requires_grad = False

n_inputs = model.head.in_features
model.head = nn.Sequential(
    nn.Linear(n_inputs, 512),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(512, len(classes))
)
model = model.to(device)
print(model.head)

Sequential(
  (0): Linear(in_features=192, out_features=512, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=512, out_features=15, bias=True)
)


In [19]:
criterion = LabelSmoothingCrossEntropy() #deals with noisy or uncertain labels in the training data (regularization technique)
criterion = criterion.to(device)  #moves the criterion to the specified device (GPU or CPU)
optimizer = optim.Adam(model.head.parameters(), lr=0.001)     # used for updating the model parameters.   

In [20]:
# lr scheduler
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)  #The learning rate scheduler.

In [21]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print("-"*10)
        
        for phase in ['train', 'val']: # We do training and validation phase per epoch
            if phase == 'train':
                model.train() # model to training mode
            else:
                model.eval() # model to evaluate
            
            running_loss = 0.0
            running_corrects = 0.0
            
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'): # no autograd makes validation go faster
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1) # used for accuracy
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            if phase == 'train':
                scheduler.step() # step at end of epoch
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc =  running_corrects.double() / dataset_sizes[phase]
            
            print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict()) # keep the best validation accuracy model
        print()
    time_elapsed = time.time() - since # slight error
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print("Best Val Acc: {:.4f}".format(best_acc))
    
    model.load_state_dict(best_model_wts)
    
    
    
    return model

In [22]:
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler) # now it is a lot faster
# I will come back after 10 epochs

Epoch 0/9
----------


100%|██████████| 13/13 [02:38<00:00, 12.21s/it]


train Loss: 1.8293 Acc: 0.6320


100%|██████████| 6/6 [00:25<00:00,  4.17s/it]


val Loss: 0.8931 Acc: 0.9348

Epoch 1/9
----------


100%|██████████| 13/13 [02:23<00:00, 11.03s/it]


train Loss: 0.9325 Acc: 0.8991


100%|██████████| 6/6 [00:34<00:00,  5.78s/it]


val Loss: 0.7516 Acc: 0.9674

Epoch 2/9
----------


100%|██████████| 13/13 [02:21<00:00, 10.87s/it]


train Loss: 0.8240 Acc: 0.9435


100%|██████████| 6/6 [00:24<00:00,  4.15s/it]


val Loss: 0.7157 Acc: 0.9728

Epoch 3/9
----------


100%|██████████| 13/13 [02:08<00:00,  9.85s/it]


train Loss: 0.7731 Acc: 0.9562


100%|██████████| 6/6 [00:22<00:00,  3.73s/it]


val Loss: 0.6779 Acc: 0.9837

Epoch 4/9
----------


100%|██████████| 13/13 [02:08<00:00,  9.91s/it]


train Loss: 0.7306 Acc: 0.9657


100%|██████████| 6/6 [00:23<00:00,  3.93s/it]


val Loss: 0.6508 Acc: 0.9891

Epoch 5/9
----------


100%|██████████| 13/13 [02:26<00:00, 11.25s/it]


train Loss: 0.6956 Acc: 0.9791


100%|██████████| 6/6 [00:28<00:00,  4.70s/it]


val Loss: 0.6465 Acc: 0.9891

Epoch 6/9
----------


100%|██████████| 13/13 [02:34<00:00, 11.88s/it]


train Loss: 0.6948 Acc: 0.9841


100%|██████████| 6/6 [00:29<00:00,  4.91s/it]


val Loss: 0.6357 Acc: 0.9946

Epoch 7/9
----------


100%|██████████| 13/13 [02:35<00:00, 11.99s/it]


train Loss: 0.6786 Acc: 0.9835


100%|██████████| 6/6 [00:28<00:00,  4.67s/it]


val Loss: 0.6275 Acc: 0.9946

Epoch 8/9
----------


100%|██████████| 13/13 [02:40<00:00, 12.33s/it]


train Loss: 0.6664 Acc: 0.9911


100%|██████████| 6/6 [00:28<00:00,  4.72s/it]


val Loss: 0.6251 Acc: 0.9946

Epoch 9/9
----------


100%|██████████| 13/13 [02:30<00:00, 11.54s/it]


train Loss: 0.6631 Acc: 0.9892


100%|██████████| 6/6 [00:24<00:00,  4.00s/it]

val Loss: 0.6211 Acc: 0.9946

Training complete in 28m 57s
Best Val Acc: 0.9946





TESTING:

Ok, now we finished training. Lets run the dataset on the test loader and calculate accuracy

In [24]:
test_loss = 0.0
class_correct = list(0 for i in range(len(classes)))
class_total = list(0 for i in range(len(classes)))
model_ft.eval()

for data, target in tqdm(test_loader):
    data, target = data.to(device), target.to(device)
    with torch.no_grad(): # turn off autograd for faster testing
        output = model_ft(data)
        loss = criterion(output, target)
    test_loss = loss.item() * data.size(0)
    _, pred = torch.max(output, 1)
    correct_tensor = pred.eq(target.data.view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    if len(target) == 32:
        for i in range(32):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

test_loss = test_loss / test_data_len
print('Test Loss: {:.4f}'.format(test_loss))
for i in range(len(classes)):
    if class_total[i] > 0:
        print("Test Accuracy of %5s: %2d%% (%2d/%2d)" % (
            classes[i], 100*class_correct[i]/class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])
        ))
    else:
        print("Test accuracy of %5s: NA" % (classes[i]))
print("Test Accuracy of %2d%% (%2d/%2d)" % (
            100*np.sum(class_correct)/np.sum(class_total), np.sum(class_correct), np.sum(class_total)
        ))

100%|██████████| 6/6 [00:34<00:00,  5.72s/it]

Test Loss: 0.0795
Test Accuracy of  Bear: 100% (12/12)
Test Accuracy of  Bird: 100% (11/11)
Test Accuracy of   Cat: 100% (11/11)
Test Accuracy of   Cow: 100% (12/12)
Test Accuracy of  Deer: 100% (10/10)
Test Accuracy of   Dog: 100% (10/10)
Test Accuracy of Dolphin: 100% ( 6/ 6)
Test Accuracy of Elephant: 100% (11/11)
Test Accuracy of Giraffe: 100% (11/11)
Test Accuracy of Horse: 100% ( 8/ 8)
Test Accuracy of Kangaroo: 100% (11/11)
Test Accuracy of  Lion: 100% (11/11)
Test Accuracy of Panda: 100% (13/13)
Test Accuracy of Tiger: 100% (11/11)
Test Accuracy of Zebra: 100% (12/12)
Test Accuracy of 100% (160/160)





In [25]:
# our model earns 93% test accuracy, which is very high. lets save it
example = torch.rand(1, 3, 224, 224)
traced_script_module = torch.jit.trace(model.cpu(), example)
traced_script_module.save("animal_classifier.pt")