# Cat-Dog Classifier

# Importing Dependencies

In [194]:
import torch
import numpy as np
import os
import os    
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import torch.nn as nn # For making NN layers
import torch.nn.functional as F # For activation functions
import torch.optim as optim # For optimizer
import matplotlib.pyplot as plt

import pandas as pd # To creaste cvs submission file

import torchvision # library for handling images
from torchvision import datasets, transforms, models # for making datasets, preprocessing data, pretrained models
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image

import torchvision.models as models # to get pretrained models

In [46]:
# Setting torch variables to GPU else cpu for training
device = 'cpu'
if torch.cuda.is_available():
    torch.device('cuda')
    device = 'cuda'
    print('Training on GPU')
else:
    torch.device('cpu')
    print('Training on CPU')
    
# Sets batchsize of training images
batch_size = 128

Training on GPU


# Creating Custom Datasets Class

In [182]:

class DatasetProcessing(Dataset):
    '''Dataset is created via reading image in image directory. Images titles are labelled with an id e.g cat.2022.jpg'''
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = annotations_file
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        
        # the image name for reading the image at idx
        img_name = 'cat'+'.'+str(idx)+'.jpg'
        if idx>=12500:
            img_name = 'dog'+'.'+str(idx%12500)+'.jpg'
        
        # directory of image at idx
        img_path = os.path.join(self.img_dir,img_name)
        # load image at idx and normalise
        image = read_image(img_path)/255
        # label of image at idx
        label = self.img_labels[idx]
        # transform image and its label
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        
        # returns image and label tuple
        return image, label
    
class TestDatasetProcessing(Dataset):
    '''The test data is created by reading test images from their directory.'''
    def __init__(self,img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return 12500

    def __getitem__(self, idx):
        
        # the image name for reading the image at idx
        img_name = str(idx+1)+'.jpg'
        # directory of image at idx
        img_path = os.path.join(self.img_dir,img_name)
        # load image at idx and normalise
        image = read_image(img_path)/255
       
        # transform image and its label
        if self.transform:
            image = self.transform(image)
        # returns image and label tuple
        return image

# The Labels

In [48]:
# Cats are 0 for first 12500
labels = np.zeros((25000))
# Dogs are the last 12500
labels[12500:]=1

# Creating Training Dataset and DataLoader

In [49]:
transform = transforms.Compose([
    transforms.Resize(size=(224,224)),
    ])
target_transform = transforms.Compose([
    transforms.ToTensor(),
    ])

# Datasets and DatLoader.
# ! Change the image folder path !
train_data =  DatasetProcessing(labels,'dataset/train', transform = transform, target_transform=None)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

# Helper Func.

In [209]:
def get_accuracy(predictions, labels):
    '''
    Calculates the predictions of the NN by counting 
    number of prediction correct over total predictions
    '''
    return predictions.argmax(dim=1).eq(labels).sum().item()

def train(model, device, dataloader, optimizer, criterion,epoch):
    '''The training loop used to train the model'''
    # Records the loss and accuracy for every epoch (one passthrough of dataloader)
    epoch_loss = 0
    epoch_acc = 0
    model.train()
    
    for batch_idx, (images,labels) in enumerate(dataloader):
        
        images = images.to(device)
        labels = labels.type(torch.LongTensor)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # outputs a 1x2 tensor of 1dim tensor scores
        pred = model(images) 
        
        # Cross Entropy loss between predicted and actual
        loss = criterion(pred,labels)
        
        # Zero all gradients in Computational Graph
        optimizer.zero_grad()
        
        # Perform Back Propagation
        loss.backward()
        
        # Perform gradient descent
        optimizer.step()
        
        # Count correct predictions
        with torch.no_grad():
            acc = get_accuracy(pred,labels)
        
        # Accumulating training loss and accuracy throughout one epoch
        epoch_loss += loss.item()
        epoch_acc += acc
        
        # Print metrics per interval
        if batch_idx%25==0:
            print(f'| Epoch: {epoch+1} | Acc: {100*acc/len(labels):5.2f}% | Loss: {epoch_loss/(batch_idx+1):.3f}|')
    
    # Returns metric
    return epoch_loss / 25000, epoch_acc / 25000

# The Model

Using ResNet18 as a feature extractor. WIll remove last 1000 class classifier with only a binary classifier for cats and dogs. Then will fine-tune on training set.

In [210]:
# Resnet18 on ImageNet predicts 1000 classes
model = models.resnet18(pretrained=True).to(device)

In [211]:
# Freezing all features of the ResNet18 feature extractor
for param in model.parameters():
    param.requires_grad = False

In [212]:
# Creating new cat-dog 2 nuerons F.C linear layer binary classifier
model.fc = nn.Linear(in_features=512, out_features=2).to(device)

In [213]:
# Criterion
criterion = nn.CrossEntropyLoss()

In [214]:
# Optimizer 
lr = 0.001
optimizer = optim.Adam(model.parameters(), lr = lr)

# Training Model

In [215]:
# One epoch is enough for high 90s percent accuracy and generalisation.
EPOCHS = 1 

# Model is saved during training based on highest training accuracy
SAVE_DIR = 'models'
MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'resnet18-cat-dog-calssifier-1-epoch-v3.pt')

best_train_acc = 0

# Training loss and acc logger to plot performance
train_loss_logger = []
train_acc_logger = []

# Checks if previous models have been saved in folder, else make one.
if not os.path.isdir(f'{SAVE_DIR}'):
    os.makedirs(f'{SAVE_DIR}')

# Finally, the training begins...
for epoch in range(EPOCHS):
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion, epoch)
    
    # Recording loss and acc per epoch
    train_loss_logger.append(train_loss)
    train_acc_logger.append(train_acc)
    
    # Save model with highest accuracy
    if train_acc > best_train_acc:
        best_train_acc = train_acc
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
    
    print(f'| EPOCH: {epoch+1} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:5.2f}% |')

Pred: torch.Size([128, 3, 224, 224])
| Epoch: 1 | Acc: 37.50% | Loss: 0.850|
Pred: torch.Size([128, 3, 224, 224])
| Epoch: 1 | Acc: 96.88% | Loss: 0.220|
| EPOCH: 1 | Train Loss: 0.001 | Train Acc: 94.12% |


In [246]:
best_train_acc

0.9412

In [248]:
np.round(train_loss_logger,5), train_acc_logger

(array([0.00125]), [0.9412])

## Just some test code to check validation of model output

In [243]:
# Test image file path
img_dir = 'dataset/test/12228.jpg'
# Reading image as tensor
cat1 = read_image(img_dir)
cat1.shape

In [244]:
# Creating a transform function to resize image to 224,224 thats required for ResNet18
transform_resize = torchvision.transforms.Resize((224,224))

# Normalisation of image as thats what is required...
cat1 = transform_resize(cat1)/255

# Converting image to FloatTensor as required by ResNet18
cat1 = cat1.type(torch.FloatTensor).unsqueeze(0)

# Checking input image shape is 3 X 224 X 224, and type tensor as required for ResNet18
cat1.shape

torch.Size([3, 224, 224])

In [245]:
# Set model to eval mode because we dont wont 
# drop out layers etc to happen during inference
model.eval()

# Inputing the image and getting back logit prediction as numpy, 
# now in cpu not gpu so we can use np.argmax() on it....
score = model(cat1.to(device)).detach().cpu().numpy()

# FINALLY, the class prediction...
p = 'Cat' if np.argmax(score[0])==0 else 'Dog'
p

'Dog'

# Prediction on Test Set

In [183]:
# A list to hold all the predictions
pred = []

# Creating the test dataset for DataLoader
test_dataset = TestDatasetProcessing('dataset/test',transform=transform)

# Creating the DataLoader from dataset so NN can iterate over a batch... 
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [184]:
# Finally, the test inference.
# Making sure model in evlaution mode 
with torch.no_grad():
    model.eval()
    
    # Iterate over batch image in data loader, perform inference per batch
    for batch, image in enumerate(test_loader):
        # Make logit scores
        score = model(image.to(device)).detach().cpu().numpy()
        
        # Make predictions based off logits
        pred_class = 0 if np.argmax(score[0])==0 else 1
        
        # Append predictions to pred list for csv submission
        pred.append(pred_class)
        

# Writing predictions to CSV file

In [195]:
# Creating a pandas dataframe with id and label header.
# Will convert this to csv.
output = pd.DataFrame({'id':np.arange(1,12501), 'label':pred})

In [196]:
# Checking the that id and predictions are in correct dtypes.
# e.g labels are in integers not float, and id starts at 1 not 0
output.head()

Unnamed: 0,id,label
0,1,1
1,2,1
2,3,1
3,4,1
4,5,0


In [249]:
# Convert pandas dataframe to csv
output.to_csv(f'submission.csv', index=False)

# Print something motivating, as all the work is finally done!
print('CONGRATULATIONS! Cat-Dog predictions saved! :)')

Cat-Dog predictions saved! :)
