## Data Preparation

In [2]:
import torch
from torchvision import transforms
from torchvision.transforms import Lambda, Resize
from datasets import load_dataset

''' 
Creates a class object MyDataset
************
__init__
  This function will load a part of the ImageNet-1K dataset specified in the 
  class caller.
  Options are train, validation, and test.
************
__getitem__
  This function will return two variables containing each a variable present 
  in the original ImageNet-1K dataset.
  Before returning these variables, it will split up and transform the image 
  into a fixed resolution of 256 by 256 pixels.
  
Returns: data, label
************
__len__
Returns: the length of the loaded dataset.
'''


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        # Loads the dataset that needs to be transformed
        self.dataset = load_dataset("Bingsu/Cat_and_Dog", split=f"{dataset}")
        

    def __getitem__(self, idx):
        # Sample row idx from the loaded dataset
        sample = self.dataset[idx]
        
        # Split up the sample example into an image and label variable
        data, label = sample['image'], sample['labels']
        
        transform = transforms.Compose([
            transforms.Resize((256, 256)),  # Resize to size 256x256
            Lambda(lambda x: x.convert("RGB") if x.mode != "RGB" else x),  # Convert all images to RGB format
            transforms.ToTensor(),  # Transform image to Tensor object
        ])
        
        return transform(data), torch.tensor(label)

    def __len__(self):
        return len(self.dataset)

In [7]:
# Load and execute transformations on datasets
train_set = MyDataset('train')
val_set = MyDataset('test')

Found cached dataset parquet (C:/Users/spenc/.cache/huggingface/datasets/Bingsu___parquet/Bingsu--Cat_and_Dog-700815090bea8354/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (C:/Users/spenc/.cache/huggingface/datasets/Bingsu___parquet/Bingsu--Cat_and_Dog-700815090bea8354/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


In [15]:
val_set.dataset.features['image']

Image(decode=True, id=None)

## Building the Neural Network Layer Sequence

In [5]:
# import the necessary packages
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten
from torch import nn

In [6]:
class CatSpotter(Module):
    def __init__(self, numChannels, classes):
        # call the parent constructor
        super(CatSpotter, self).__init__()
        # initialize first set of CONV => RELU => POOL layers
        self.conv1 = Conv2d(in_channels=numChannels, out_channels=20,
            kernel_size=(5, 5))
        self.relu1 = ReLU()
        self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize second set of CONV => RELU => POOL layers
        self.conv2 = Conv2d(in_channels=20, out_channels=50,
            kernel_size=(5, 5))
        self.relu2 = ReLU()
        self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize third set of CONV => RELU => POOL layers
        self.conv3 = Conv2d(in_channels=50, out_channels=200,
            kernel_size=(5, 5))
        self.relu3 = ReLU()
        self.maxpool3 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize first (and only) set of FC => RELU layers
        self.fc1 = Linear(in_features=200 * 28 * 28, out_features=500)  # Update in_features
        self.relu4 = ReLU()
        # initialize our softmax classifier
        self.fc2 = Linear(in_features=500, out_features=classes)
        self.logSoftmax = LogSoftmax(dim=1)
        
    def forward(self, x):
        # pass the input through our first set of CONV => RELU =>
        # POOL layers
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        # pass the output from the previous layer through the second
        # set of CONV => RELU => POOL layers
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        # pass the output from the previous layer through the third
        # set of CONV => RELU => POOL layers
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)
        # flatten the output from the previous layer and pass it
        # through our only set of FC => RELU layers
        x = flatten(x, 1)
        x = self.fc1(x)
        x = self.relu4(x)
        # pass the output to our softmax classifier to get our output
        # predictions
        x = self.fc2(x)
        output = self.logSoftmax(x)
        # return the output predictions
        return output

## Initializing The Model

In [9]:
# import the necessary packages
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import argparse
import time
from tqdm import tqdm
import glob
import os

In [10]:
# define training hyperparameters
INIT_LR = 1e-3
BATCH_SIZE = 128
EPOCHS = 100

# set the device we will be using to train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
# calculate steps per epoch for training and validation set
trainSteps = len(train_set) // BATCH_SIZE
valSteps = len(val_set) // BATCH_SIZE

In [15]:
# initialize the CatSpotter model
print("[INFO] initializing the CatSpotter model...")
model = CatSpotter(
    numChannels=3,
    classes=2).to(device)
# initialize our optimizer and loss function
opt = Adam(model.parameters(), lr=INIT_LR)
lossFn = nn.NLLLoss()
# initialize a dictionary to store training history
H = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": []
}
# Create a GradScaler for mixed-precision training
scaler = GradScaler()

[INFO] initializing the deCNN model...


## Initializing Model Checkpoints

In [18]:
# Initialize model checkpoint saving location
PATH = 'models/1-23-2024'

In [17]:
# Save model checkpoint
def save_ckp(state, checkpoint_dir, epoch_nr):
    f_path = checkpoint_dir + f'/epoch_{epoch_nr}.pt'
    torch.save(state, f_path)

In [19]:
# # Load model from checkpoint
# print("[INFO] Loading the network from checkpoint...")
# files = sorted(glob.glob(f"{PATH}/*"), key=os.path.getctime, reverse=True)
# checkpoint = torch.load(files[0])
# model.load_state_dict(checkpoint['state_dict'])
# opt.load_state_dict(checkpoint['optimizer'])
# epoch = checkpoint['epoch']

## The Model Trainer

In [24]:
# initialize the train, validation, and test data loaders
trainDataLoader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, pin_memory = True, shuffle=True)
valDataLoader = torch.utils.data.DataLoader(val_set, batch_size=BATCH_SIZE, pin_memory = True)

In [None]:
###### measure how long training is going to take
print("[INFO] training the CatSpotter network...")
startTime = time.time()

# loop over our epochs
for e in range(0, EPOCHS):
    # initialize the total training and validation loss
    totalTrainLoss = 0
    totalValLoss = 0
    # initialize the number of correct predictions in the training
    # and validation step
    trainCorrect = 0
    valCorrect = 0

    # set the model in training mode
    model.train()

    # loop over the training set
    with tqdm(trainDataLoader, unit="batch") as tepoch:
        for x, y in tepoch:
            
            # send the input to the device
            (x, y) = (x.to(device), y.to(device))
            
            # zero out the gradients
            opt.zero_grad()
            
            # Enables autocasting for the forward pass (model + loss)
            output = model(x)
            loss = lossFn(output, y)

            # Exits the context manager before backward()
            loss.backward()
            opt.step()           

            # add the loss to the total training loss so far and
            # calculate the number of correct predictions
            totalTrainLoss += loss
            trainCorrect += (output.argmax(1) == y).type(
                torch.float).sum().item()

    # switch off autograd for evaluation
    with torch.no_grad():
        # set the model in evaluation mode
        model.eval()
        # loop over the validation set
        with tqdm(valDataLoader, unit="batch") as vepoch:
            for (x, y) in vepoch:
                # send the input to the device
                (x, y) = (x.to(device), y.to(device))
                # make the predictions and calculate the validation loss
                
                output = model(x)
                
                totalValLoss += lossFn(output, y)
                # calculate the number of correct predictions
                valCorrect += (output.argmax(1) == y).type(
                    torch.float).sum().item()

    # calculate the average training and validation loss
    avgTrainLoss = totalTrainLoss / trainSteps
    avgValLoss = totalValLoss / valSteps
    # calculate the training and validation accuracy
    trainCorrect = trainCorrect / len(train_set)
    valCorrect = valCorrect / len(val_set)
    # update our training history
    H["train_loss"].append(avgTrainLoss)
    H["train_acc"].append(trainCorrect)
    H["val_loss"].append(avgValLoss)
    H["val_acc"].append(valCorrect)
    # print the model training and validation information
    print("[INFO] EPOCH: {}/{}".format(e + 1, EPOCHS))
    print("Train loss: {:.6f}, Train accuracy: {:.4f}".format(
        avgTrainLoss, trainCorrect))
    print("Val loss: {:.6f}, Val accuracy: {:.4f}\n".format(
        avgValLoss, valCorrect))

    # Save the epoch model parameters
    checkpoint = {
        'epoch': e + 1,
        'state_dict': model.state_dict(),
        'optimizer': opt.state_dict()
    }

    save_ckp(checkpoint, PATH , e)

# finish measuring how long training took
endTime = time.time()
print("[INFO] total time taken to train the model: {:.2f}s".format(
    endTime - startTime))

[INFO] training the CatSpotter network...


 38%|███▊      | 24/63 [03:49<05:52,  9.04s/batch]

## Model Evaluation

In [None]:
# we can now evaluate the network on the test set
print("[INFO] evaluating network...")
# turn off autograd for testing evaluation
with torch.no_grad():
    # set the model in evaluation mode
    model.eval()

    # initialize a list to store our predictions
    preds = []
    # loop over the test set
    for batch in valDataLoader:
        x = batch["image"].to(device)
        # make the predictions and add them to the list
        pred = model(x)
        preds.extend(pred.argmax(axis=1).cpu().numpy())


# generate a classification report
print(classification_report(validation_data['label'], np.array(preds)))