<a href="https://colab.research.google.com/github/Fortune-Adekogbe/Toyota-Models-Classification/blob/main/Notebooks/Toyota_Vehicle_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Toyota Vehicle Classification Project**



Setting up the Project Directory and checking its contents

In [None]:
#!mkdir /content/drive/MyDrive/Colab_Data/FlowerClassificationData
%cd /content/drive/MyDrive/Colab_Data/curacel/

In [None]:
!ls

**Importing required libraries**

In [None]:
import os, sys, gc
import numpy as np
import pandas as pd
import random
import copy
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import *

from PIL import Image
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from random import shuffle 
from shutil import copyfile
%matplotlib inline

### Setting up the seeds for reproducibility

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(21)

**Check if pytorch is imported and if GPU is enabled**


In [None]:
print(torch.__version__)
print(torch.cuda.is_available())

In [None]:
import json
import joblib

with open('data.jl', 'rb') as f:
    data = joblib.load(f)
cat_to_name = {f'{i}':j for i,j in enumerate(data.keys(),start=1)}

In [None]:
len(cat_to_name)

In [None]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 64
# percentage of training set to use as validation
valid_size = 0.2
# specify data directory
train = "train/"

In [None]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

# convert data to a normalized torch.FloatTensor
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.ColorJitter(), #Randomly change the brightness, contrast and saturation of an image.
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])])

# choose the training datasets
train_data = datasets.ImageFolder(train, transform=transform)

# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=64, 
    sampler=valid_sampler, num_workers=num_workers)

In [None]:
samples, labels = iter(train_loader).next()
plt.figure(figsize=(16,24))
grid_imgs = torchvision.utils.make_grid(samples[:24])
np_grid_imgs = grid_imgs.numpy()
# Transpose image from tensor format (batch, width, height) to numpy (width, height, batch) to show it.
plt.imshow(np.transpose(np_grid_imgs, (1,2,0)))

## **Modelling**

In [None]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=3, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
 
    def __call__(self, val_loss, model):
 
        score = -val_loss
 
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0
 
    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), "models/"+f"bestmodel.pt")
        self.val_loss_min = val_loss

### **Transfer Learning: Resnext50_32x4d**

In [None]:
# Load a pretrained model
model = models.resnext50_32x4d(pretrained=False)
model.fc # Check resnet's fully connected layer

In [None]:
model = models.resnext50_32x4d(pretrained=False)
 
# Freeze parameters so we don't backprop through them
for param in model.parameters():
  param.requires_grad = False
    
# Get model Output Size = Number of Categories
output_size = len(cat_to_name)
 
# Input size from current classifier
input_size = model.fc.in_features
 
classifier = nn.Sequential(nn.Linear(input_size, 1024),
                           nn.ReLU(),
                           nn.Dropout(p=0.2),
                           nn.Linear(1024, 256),
                           nn.ReLU(),
                           nn.Dropout(p=0.2),
                           nn.Linear(256, 34),
                          )
 
model.fc = classifier
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
print(model)
 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, amsgrad=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=5, factor=0.3, verbose=True)
early_stopping = EarlyStopping(patience=7, verbose=True)
 
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)

### **Training**

In [None]:
directory = 'models1/'
epochs = 10
train_loss_list, valid_loss_list = [], []
valid_acc_list = []

for epoch in range(epochs):
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Training
    model.train()
    train_loss = 0
    train_counter = 0
    for bi, (samples, labels) in tqdm(enumerate(train_loader), total=int(len(train_data)/train_loader.batch_size)):
        train_counter += 1
        samples, labels = samples.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(samples)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
      
    train_loss = train_loss/train_counter
    train_loss_list.append(train_loss)

    # Validation
    valid_loss = 0
    with torch.no_grad():
        model.eval()
        total_label, correct = 0, 0
        samples, labels = iter(valid_loader).next()
        samples, labels = samples.to(device), labels.to(device)
        output = model(samples)
        loss = criterion(output, labels)
        valid_loss = loss.item()
        pred = torch.argmax(output, dim=1)
        total_label = labels.size(0)
        correct = pred.eq(labels).sum().item()

    valid_acc = (100 * correct) // total_label
    valid_loss_list.append(valid_loss)
    valid_acc_list.append(valid_acc)

    print('[Epoch {}/{}] -> Train Loss: {:.4f} -> Valid Loss: {:.4f}, Valid Accuracy: {:.3f}%'.format(epoch+1, epochs, train_loss, valid_loss, valid_acc))

    # Early Stopping
    early_stopping(valid_loss, model)  
    if early_stopping.early_stop:
        print("Early stopping at {} epoch".format(epoch))
        break

    scheduler.step(valid_loss)

In [None]:
train_data.class_to_idx

In [None]:
plt.plot(train_loss_list, label = 'Training loss')
plt.plot(valid_loss_list, label = 'Validation loss')
plt.legend(frameon = False)
plt.title('Training Loss vs Validation Loss')
plt.show()

In [None]:
def load_checkpoint(model, file="models/"+f"bestmodel.pt"):
    state_dict = torch.load(file)
    model.load_state_dict(state_dict)
    # model.load_state_dict(state_dict, strict=False)
    return model

chkp_model = load_checkpoint(model)
chkp_model = chkp_model.to(device)

### **Verifying Validation Accuracy and Error**

In [None]:
with torch.no_grad():
    valid_loss = 0.0
    chkp_model.eval()
    total_label, correct = 0, 0
    for samples, labels in valid_loader:
        samples, labels = samples.to(device), labels.to(device)
        output = chkp_model(samples)
        loss = criterion(output, labels)
        valid_loss += loss.item()*samples.size(0)
        _, predicted = torch.max(output.data, 1)
        total_label += labels.size(0)
        correct += (predicted == labels).sum().item()

    valid_loss = valid_loss/len(valid_loader.dataset)
    valid_acc = (100 * correct) // total_label

print('Valid Loss: {:.4f}, Valid Accuracy: {:.3f}%'.format(valid_loss, valid_acc))

### **View Results**

In [None]:
class_to_idx = train_data.class_to_idx
idx_to_class = {class_to_idx[k]: k for k in class_to_idx}
idx_to_class

## **Testing**

In [None]:
model = models.resnext50_32x4d(pretrained=False)
state_dict = torch.load('models/bestmodel.pt')
classifier = nn.Sequential(nn.Linear(input_size, 1024),
                           nn.ReLU(),
                           nn.Dropout(p=0.2),
                           nn.Linear(1024, 256),
                           nn.ReLU(),
                           nn.Dropout(p=0.2),
                           nn.Linear(256, 34),
                          )

model.fc = classifier
model.load_state_dict(state_dict)

In [None]:
samples, _ = iter(valid_loader).next()
samples = samples.to(device)
fig = plt.figure(figsize=(24, 16))
fig.tight_layout()
output = chkp_model(samples[:24])
pred = torch.argmax(output, dim=1)
pred = [p.item() for p in pred]

for num, sample in enumerate(samples[:24]):
    plt.subplot(4,6,num+1);
    plt.title(cat_to_name[idx_to_class[pred[num]]]);
    plt.axis('off');
    sample = sample.cpu().numpy();
    plt.imshow(np.transpose(sample, (1,2,0)));

### **Future Revisions**
*  Try other pretrained models
*  KFold CV
*  Remove the train data modifications
