In [13]:
# <font style="color:blue">Project 1 - Part 1: Data Understanding & Pipeline Check</font>

Here, we will slightly modify the steps we had used to train Neural Networks:

- Step 1 - Understand your problem
- Step 2A - Get the data
- Step 2B - Explore & Understand your data
- Step 2C - Create a sample data from the dataset
- Step 3 - Data Preparation
- Step 4 - Train a simple model on sample data, and check the pipeline before training the full network
- Step 5 - Train on full Data
- Step 6 - Improve your model

In this notebook we will detail the steps 1 to 4, and do some coding along the way! You will implement Steps 5 & 6 in the next notebook.

This notebook contains 30 points. <font style="color:red">The sections in red are the ones that carry marks.</font>

SyntaxError: invalid decimal literal (4100504795.py, line 6)

#### Points Distribution - Maximum Points: 30


<div align="center">
    <table>
        <tr><td><h3>Number</h3></td> <td><h3>Section</h3></td> <td><h3>Points</h3></td> </tr>
        <tr><td><h3>1</h3></td> <td><h3>Explore the Data</h3></td> <td><h3>5</h3></td> </tr>
        <tr><td><h3>2</h3></td> <td><h3>Data Preparation</h3></td> <td><h3>5</h3></td> </tr>
        <tr><td><h3>3</h3></td> <td><h3>Configurations</h3></td><td><h3>5</h3></td> </tr>
        <tr><td><h3>4</h3></td> <td><h3>Display Mistakes</h3></td> <td><h3>15</h3></td> </tr>
    </table>
</div>


# <font style="color:blue">Step 1: Understand Your Problem </font><a name="step1"></a>

As you already know, Image Classification helps classify an image based on its visual content. So, the model is supposed to look at the given image and predict which object is present in it. Obviously, the number of objects which it can predict depends on how many you trained it on.

In our problem, we want to classify an input image between **3 animals** - **_cat, dog and panda_**.

### <font style="color:green">What Do We Need and How to Achieve It? </font>

1. You need correctly-labeled images of each animal.
2. Also, you need to train a network to understand the input image.


## <font style="color:blue">Step 2A: Get the Data </font>

Let’s use a dataset from kaggle. <a target="_blank" href="https://www.kaggle.com/competitions/open-cv-py-torch-project-1-classification-round-2/data">Go to the Data Tab</a>.

We have already separated the dataset into training, validation and test splits for you.

In [None]:
import os
import matplotlib.pyplot as plt


# Text formatting
bold = "\033[1m"
end = "\033[0m"

plt.style.use('ggplot')
block_plot=False

%matplotlib inline
#print("Raj")

# <font style="color:blue">Step 2B: Explore the Data [5 Points]</font><a name="step2b"></a>


In [14]:
#root_dir = r"/kaggle/input/opencv-pytorch-classification-project-1/dataset"

root_dir = r"/kaggle/input/open-cv-py-torch-project-1-classification-round-2/dataset"

train_dir = os.path.join(root_dir, "Train")
valid_dir = os.path.join(root_dir, "Valid")

print(root_dir)
print(train_dir)
print(valid_dir)

/kaggle/input/open-cv-py-torch-project-1-classification-round-2/dataset
/kaggle/input/open-cv-py-torch-project-1-classification-round-2/dataset/Train
/kaggle/input/open-cv-py-torch-project-1-classification-round-2/dataset/Valid


### <font style="color:green">Folder Structure </font>


In [15]:
def list_folders(startpath):
    for root, _, files in os.walk(startpath):
        level = root.replace(startpath, "").count(os.sep)
        indent = " " * 4 * (level)
        print(f"{indent}{os.path.basename(root):<8}")


list_folders(root_dir)

dataset 
    Valid   
        dog     
        panda   
        cat     
    Test    
    Train   
        dog     
        panda   
        cat     


### <font style="color:green">Import the Datasets Module</font>

Load the data utilizing the `datasets` module.

In [18]:
from torchvision import datasets

AttributeError: module 'torch' has no attribute 'fx'

### <font style="color:green">Create a Dataset Object </font>


In [None]:
train_data = datasets.ImageFolder(train_dir)
validation_data = datasets.ImageFolder(valid_dir)

### <font style="color:green">List the Classes</font>

It simply prints the sub folders present under the training or validation folder.


In [None]:
print(train_data.classes)
print(validation_data.classes)

### <font style="color:green">Get the Class ID to Name Mapping</font>

In [None]:
print(train_data.class_to_idx)
print(validation_data.class_to_idx)

### <font style="color:red">Find the Number of Samples in Training and Validation Folders [2 Points]</font>


In [None]:
# Enter Code Here

# Number of samples in training and validation datasets
print(f"{bold}Number of training samples:{end}", len(train_data))
print(f"{bold}Number of validation samples:{end}", len(validation_data))

### <font style="color:red">Display Some Samples [3 Points]</font>

Display one sample from each class. We know that the train dataset contains

- cats from 0 to 699
- dogs from 700 to 1399
- pandas from 1400 to 2099

Take one sample from each class and display using matplotlib


In [None]:
# Enter Code Here

img, target = train_data[0]

print("image size: {}, target: {}".format(img.size, target))

plt.imshow(img)
plt.axis('off')
plt.title("Cat")
plt.show()

In [None]:
# Enter Code Here

img, target = train_data[700]

print("image size: {}, target: {}".format(img.size, target))

plt.imshow(img)
plt.show()

In [None]:
# Enter Code Here

img, target = train_data[1400]

print("image size: {}, target: {}".format(img.size, target))

plt.imshow(img)
plt.show()

**Note:** The images are all of different size.


# <font style="color:blue">Step 2C: Create Sample Data</font><a name="step2c"></a>

Take `5%` images from training and validation to create a small sample dataset, which will check our training pipeline.


In [None]:
import torch
import numpy as np

In [None]:
subset_size = 0.05

### <font style="color:green">Create a Subset From the Original Data</font>

Instead of copying the original data, use the `Subset` method in `torch` to create a subset of the data. And use this to train the model.

This is new. Let’ss see how it is done.

In [None]:
train_subset = torch.utils.data.Subset(train_data, np.arange(0, len(train_data), 1.0 / subset_size))

validation_subset = torch.utils.data.Subset(validation_data, np.arange(0, len(validation_data), 1.0 / subset_size))

In [None]:
train_subset_loader = torch.utils.data.DataLoader(train_subset, batch_size=8, num_workers=1, shuffle=False)

validation_subset_loader = torch.utils.data.DataLoader(validation_subset, batch_size=8, num_workers=1, shuffle=False)

In [None]:
print("Train Subset Size: {}".format(len(train_subset_loader.dataset)))
print("Validation Subset Size: {}".format(len(validation_subset_loader.dataset)))

You can see that the subset data is only 5% of the original training and validation data.

We will create a separate function called `subset_data_loader` to create data loaders for subsets.


# <font style="color:blue">Step 3. Data Preparation [5 Points]</font> <a name="step3"></a>

Now that you have seen how the data is organized, configure the train and valid loaders to feed the training pipeline.


## <font style="color:blue">3.1. Import Libraries </font>


In [None]:
import time
from dataclasses import dataclass
from typing import List, Union, Tuple


from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.tensorboard import SummaryWriter

from torchvision import datasets, transforms

from torchmetrics import MeanMetric
from torchmetrics.classification import MulticlassAccuracy

## <font style="color:blue">3.2. Image Transforms</font>

We need to normalize the data. Some of the standard ways of doing it is to, subtract the mean, make all images equal in size or to rescale the range to `[0., 1.]` etc. Use the following functions to do this:


### <font style="color:green">3.2.1. Compulsary Preprocessing Transforms</font>


In [None]:
def image_preprocess_transforms(img_size):
    preprocess = transforms.Compose(
        [
            transforms.Resize(img_size),
            transforms.ToTensor(),
        ]
    )

    return preprocess

### <font style="color:green">3.2.2. Common Image Transforms</font>

Normalize using mean and std. You can add any other transforms here as per your dataset.


In [None]:
def image_common_transforms(img_size=(224, 224), mean=(0.4611, 0.4359, 0.3905), std=(0.2193, 0.2150, 0.2109)):
    preprocess = image_preprocess_transforms(img_size)

    common_transforms = transforms.Compose(
        [
            preprocess,
            transforms.Normalize(mean, std),
        ]
    )

    return common_transforms

### <font style="color:green">3.2.3. Mean and STD</font>

Function for Calculating Mean and Variance

In [None]:
def get_mean_std(data_root, img_size=(224, 224), num_workers=4):
    transform = image_preprocess_transforms(img_size=img_size)

    loader = data_loader(data_root, transform)

    batch_mean = torch.zeros(3)
    batch_mean_sqrd = torch.zeros(3)

    for batch_data, _ in loader:
        batch_mean += batch_data.mean(dim=(0, 2, 3))  # E[batch_i]
        batch_mean_sqrd += (batch_data**2).mean(dim=(0, 2, 3))  #  E[batch_i**2]

    # E[dataset] = E[E[batch_1], E[batch_2], ...]
    mean = batch_mean / len(loader)

    # var[X] = E[X**2] - E[X]**2

    # E[X**2] = E[E[batch_1**2], E[batch_2**2], ...]
    # E[X]**2 = E[E[batch_1], E[batch_2], ...] ** 2

    var = (batch_mean_sqrd / len(loader)) - (mean**2)

    std = var**0.5
    print("mean: {}, std: {}".format(mean, std))

    return mean, std

## <font style="color:blue">3.3. Data Loaders </font>


### <font style="color:green">3.3.1. Data Loader for Full Data</font>

Data loader used by the training routine to generate batches of data:


In [None]:
def data_loader(data_root, transform, batch_size=16, shuffle=False, num_workers=2):
    dataset = datasets.ImageFolder(root=data_root, transform=transform)

    loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=shuffle,
    )

    return loader

### <font style="color:green">3.3.2. Data Loader for Subset</font>

Data loader which uses the `Subset` to generate batches


In [None]:
def subset_data_loader(data_root, transform, batch_size=8, shuffle=False, num_workers=2, subset_size=0.05):
    dataset = datasets.ImageFolder(root=data_root, transform=transform)

    data_subset = torch.utils.data.Subset(
        dataset,
        np.arange(0, len(dataset), 1.0 / subset_size).astype(int),
    )

    loader = torch.utils.data.DataLoader(data_subset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle)

    return loader

## <font style="color:red">3.4. Prepare Data [5 Points]</font>

The main function which uses all the above functions to generate the train and valid dataloaders.


### <font style="color:red">Question </font>[5 Points]

There are two mistakes in the below code snippet. Spot the errors and correct them all. (Note: The rest of the code will run, even if you fail to find the mistakes.)


In [None]:
def get_data(batch_size, data_root, img_size=(224, 224), num_workers=4, data_augmentation=False):
    train_data_path = os.path.join(data_root, "Train")

    mean, std = get_mean_std(data_root=train_data_path, img_size=img_size, num_workers=num_workers)

    common_transforms = image_common_transforms(img_size, mean, std)

    # If data_augmentation is true data augmentation will be applied.
    if data_augmentation:
        train_transforms = data_augmentation_preprocess(mean, std)
    # Else simply do common transforms
    else:
        train_transforms = common_transforms

    # Train dataloader
    train_loader = subset_data_loader(
        train_data_path,
        train_transforms,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
    )

    # Valid dataloader
    valid_data_path = os.path.join(data_root, "Valid")

    '''
    valid_loader = subset_data_loader(
        valid_data_path,
        train_transforms,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
    )

    '''
    valid_loader = data_loader(
        valid_data_path,
        common_transforms,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
    )

    return train_loader, valid_loader

# <font style="color:blue">Step 4: Train a Simple Model</font><a name="step4"></a>

Let's dive into creating the training pipeline and training a simple model on our sample data. Don't worry, we've got most of the code covered for you in this section! In the next notebook, it'll be your turn to take the reins and make all the necessary tweaks and changes. It's going to be an exciting learning journey!

## <font style="color:red">4.1. Configurations [ 5 Points]</font>

In this section, define the training and system configurations. 

### <font style="color:red">Question </font>[5 Points]:

Set up the training pipeline with a batch size of `4`. Run the experiment then for `100` epochs. Change the configurations as given below:


### <font style="color:green">4.1.1. System Configuration</font>


In [None]:
from dataclasses import dataclass

@dataclass
class SystemConfig:
    """
    Describes the common system setting needed for reproducible training
    """

    seed: int = 21  # Seed number to set the state of all random number generators
    cudnn_benchmark_enabled: bool = True  # Enable CuDNN benchmark for the sake of performance
    cudnn_deterministic: bool = True  # Make cudnn deterministic (reproducible training)

In [None]:
def system_setup(config: SystemConfig):
    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = config.cudnn_benchmark_enabled
    torch.backends.cudnn.deterministic = config.cudnn_deterministic

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    return device

### <font style="color:green">4.1.2. Training Configuration</font>


In [None]:
config = TrainingConfig()  # ✅ Create the config object

model = MyModel(num_classes=config.num_classes, dropout_prob=config.dropout_prob).to(config.device)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=config.init_learning_rate,
    weight_decay=config.weight_decay
)

scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=config.scheduler_step_size,
    gamma=config.scheduler_gamma
)


In [None]:
from dataclasses import dataclass
from typing import Tuple
import os
import torch

@dataclass
class TrainingConfig:
    """
    Configuration for the training process
    """

    # Core training settings
    num_classes: int = 3
    batch_size: int = 8                      # 🔼 Slight increase for better batch statistics
    img_size: Tuple[int, int] = (224, 224)
    epochs_count: int = 50                   # 🔽 Initially lower to allow faster experimentation
    init_learning_rate: float = 0.001
    weight_decay: float = 1e-4               # 🛡️ Helps regularization
    dropout_prob: float = 0.5                # 💧 To use in model

    # Dataset path and loader config
    data_root: str = "/kaggle/input/open-cv-py-torch-project-1-classification-round-2/dataset"
    num_workers: int = 2
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

    # Normalization stats (used in transforms)
    mean: Tuple[float, float, float] = (0.4573, 0.4348, 0.3884)
    std: Tuple[float, float, float] = (0.2686, 0.2601, 0.2600)

    # Logging & checkpointing
    save_model_name: str = "cat_dog_panda_classifier.pt"
    root_log_dir: str = os.path.join("Logs_Checkpoints", "Model_logs")
    root_checkpoint_dir: str = os.path.join("Logs_Checkpoints", "Model_checkpoints")
    log_dir: str = "version_0"
    checkpoint_dir: str = "version_0"

    # Learning rate scheduler
    scheduler_step_size: int = 15            # 🔁 Decay step
    scheduler_gamma: float = 0.5             # 📉 Reduce LR by this factor


In [None]:
def data_augmentation_preprocess(mean, std, img_size=(224, 224)):
    return transforms.Compose([
        transforms.Resize(img_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.RandomAffine(degrees=15, translate=(0.1, 0.1))

    ])


In [None]:
def get_resnet18_model(num_classes):
    model = models.resnet18(weights=None)  # <-- offline mode
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model



In [None]:
training_config = TrainingConfig()

from torchvision import models

def get_resnet18_model(num_classes):
    model = models.resnet18(weights=None)  # <-- Offline-safe
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

model = get_resnet18_model(training_config.num_classes).to(training_config.device)

#model = get_resnet18_model(config.num_classes).to(config.device)

### <font style="color:green">4.1.3. System Setup</font>


In [None]:
def setup_system(system_config: SystemConfig) -> None:
    torch.manual_seed(system_config.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(system_config.seed)
        torch.backends.cudnn_benchmark_enabled = system_config.cudnn_benchmark_enabled
        torch.backends.cudnn.deterministic = system_config.cudnn_deterministic

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics.classification import MulticlassAccuracy
from torchmetrics import MeanMetric
from tqdm import tqdm

In [None]:
def train(
    train_config: TrainingConfig,
    model: nn.Module,
    optimizer: torch.optim.Optimizer,
    train_loader: torch.utils.data.DataLoader,
    epoch_idx: int,
    total_epochs: int,
) -> Tuple[float, float]:
    
    # Change model in training mode.
    model.train()

    acc_metric = MulticlassAccuracy(num_classes=train_config.num_classes, average="micro")
    mean_metric = MeanMetric()

    device = train_config.device

    status = f"Train:\t{bold}Epoch: {epoch_idx}/{total_epochs}{end}"

    prog_bar = tqdm(train_loader, bar_format="{l_bar}{bar:10}{r_bar}{bar:-10b}")

    prog_bar.set_description(status)

    for data, target in prog_bar:
        # Send data and target to appropriate device.
        data, target = data.to(device), target.to(device)

        # Reset parameters gradient to zero.
        optimizer.zero_grad()

        # Forward pass to the model.
        output = model(data)

        # Cross Entropy loss
        loss = F.cross_entropy(output, target)

        # Find gradients w.r.t training parameters.
        loss.backward()

        # Update parameters using gradients.
        optimizer.step()

        # Batch Loss.
        mean_metric(loss.item(), weight=data.shape[0])

        # # Get probability score using softmax.
        # prob = F.softmax(output, dim=1)

        # Get the index of the max probability.
        pred_idx = output.detach().argmax(dim=1)

        # Batch accuracy.
        acc_metric(pred_idx.cpu(), target.cpu())

        # Update progress bar description.
        step_status = status + f" Train Loss: {mean_metric.compute():.4f}, Train Acc: {acc_metric.compute():.4f}"
        prog_bar.set_description(step_status)

    epoch_loss = mean_metric.compute()
    epoch_acc = acc_metric.compute()

    prog_bar.close()

    return epoch_loss, epoch_acc

## <font style="color:blue">4.3. Validation Function</font>

In the upcoming code cell, we will create the validation function. This function is essential for assessing the performance of our model on unseen data, ensuring its effectiveness and accuracy.


In [None]:
def validate(
    train_config: TrainingConfig, 
    model: nn.Module, 
    valid_loader: torch.utils.data.DataLoader,
    epoch_idx: int, 
    total_epochs: int
) -> Tuple[float, float]:

    # Change model in evaluation mode.
    model.eval()

    acc_metric = MulticlassAccuracy(num_classes=train_config.num_classes, average="micro")
    mean_metric = MeanMetric()

    device = train_config.device

    status = f"Valid:\t{bold}Epoch: {epoch_idx}/{total_epochs}{end}"

    prog_bar = tqdm(valid_loader, bar_format="{l_bar}{bar:10}{r_bar}{bar:-10b}")

    prog_bar.set_description(status)

    for data, target in prog_bar:
        # Send data and target to appropriate device.
        data, target = data.to(device), target.to(device)

        # Get the model's predicted logits.
        with torch.no_grad():
            output = model(data)

        # Compute the CE-Loss.
        valid_loss = F.cross_entropy(output, target).item()

        # Batch validation loss.
        mean_metric(valid_loss, weight=data.shape[0])

        # # Convert model's logits to probability scores.
        # prob = F.softmax(output, dim=1)

        # Get the index of the max probability.
        pred_idx = output.detach().argmax(dim=1)

        # Batch accuracy.
        acc_metric(pred_idx.cpu(), target.cpu())

        # Update progress bar description.
        step_status = status + f" Valid Loss: {mean_metric.compute():.4f}, Valid Acc: {acc_metric.compute():.4f}"
        prog_bar.set_description(step_status)

    valid_loss = mean_metric.compute()
    valid_acc = acc_metric.compute()

    prog_bar.close()

    return valid_loss, valid_acc

## <font style="color:blue">4.4. Save & Load Model</font>

The following two code cells are dedicated to essential functions in deep learning model management:

1. **Saving the Model Function**: This function is crucial for preserving the trained model state, allowing us to store the learned parameters for future use or further analysis.

2. **Loading the Model Function**: This function is designed to retrieve and load a previously saved model. It's vital for resuming training, making predictions, or conducting evaluations without having to retrain the model from scratch.


In [None]:
def save_model(model, device, model_dir="models", model_file_name="cat_dog_panda_classifier.pt"):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    model_path = os.path.join(model_dir, model_file_name)

    # Make sure you transfer the model to cpu.
    if device == "cuda":
        model.to("cpu")

    # Save the 'state_dict'
    torch.save(model.state_dict(), model_path)

    if device == "cuda":
        model.to("cuda")

    return

In [None]:
def load_model(model, model_dir="models", model_file_name="cat_dog_panda_classifier.pt", device=torch.device("cpu")):
    model_path = os.path.join(model_dir, model_file_name)

    # Load model parameters by using 'load_state_dict'.
    model.load_state_dict(torch.load(model_path, map_location=device))

    return model

## <font style="color:blue">4.5. Logging Setup</font>

This function will be initializing directories so that they save tensorboard and model checkpoints for different training versions.


In [None]:
def setup_log_directory(training_config=TrainingConfig()):
    """Tensorboard Log and Model checkpoint directory Setup"""

    if os.path.isdir(training_config.root_log_dir):
        # Get all folders numbers in the root_log_dir.
        folder_numbers = [int(folder.replace("version_", "")) for folder in os.listdir(training_config.root_log_dir)]

        # Find the latest version number present in the log_dir
        last_version_number = max(folder_numbers)

        # New version name
        version_name = f"version_{last_version_number + 1}"

    else:
        version_name = training_config.log_dir

    # Update the training config default directory.
    training_config.log_dir = os.path.join(training_config.root_log_dir, version_name)
    training_config.checkpoint_dir = os.path.join(training_config.root_checkpoint_dir, version_name)

    # Create new directory for saving new experiment version.
    os.makedirs(training_config.log_dir, exist_ok=True)
    os.makedirs(training_config.checkpoint_dir, exist_ok=True)

    print(f"Logging at: {training_config.log_dir}")
    print(f"Model Checkpoint at: {training_config.checkpoint_dir}")

    return training_config, version_name

## <font style="color:blue">4.6. Plot Loss and Accuracy</font>

The next code cell will focus on developing a function for plotting loss and accuracy graphs. This function is instrumental in visualizing the performance of the deep learning model throughout the training process, providing insights into its learning behavior by displaying trends in loss reduction and accuracy improvement over epochs.


In [None]:
def plot_loss_accuracy(
    train_loss,
    val_loss,
    train_acc,
    val_acc,
    colors,
    loss_legend_loc="upper center",
    acc_legend_loc="upper left",
    fig_size=(20, 10),
    sub_plot1=(1, 2, 1),
    sub_plot2=(1, 2, 2),
):
    plt.rcParams["figure.figsize"] = fig_size
    fig = plt.figure()
    plt.subplot(sub_plot1[0], sub_plot1[1], sub_plot1[2])

    for i in range(len(train_loss)):
        x_train = range(len(train_loss[i]))
        x_val = range(len(val_loss[i]))

        min_train_loss = min(train_loss[i])
        min_val_loss = min(val_loss[i])

        plt.plot(x_train, train_loss[i], linestyle="-", color=f"tab:{colors[i]}", label=f"TRAIN LOSS ({min_train_loss:.4})")
        plt.plot(x_val, val_loss[i], linestyle="--", color=f"tab:{colors[i]}", label=f"VALID LOSS ({min_val_loss:.4})")


    plt.xlabel("epoch no.")
    plt.ylabel("loss")
    plt.legend(loc=loss_legend_loc)
    plt.title("Training and Validation Loss")
    plt.subplot(sub_plot2[0], sub_plot2[1], sub_plot2[2])

    for i in range(len(train_acc)):
        x_train = range(len(train_acc[i]))
        x_val = range(len(val_acc[i]))

        max_train_acc = max(train_acc[i])
        max_val_acc = max(val_acc[i])

        plt.plot(
            x_train,
            train_acc[i],
            linestyle="-",
            color=f"tab:{colors[i]}",
            label=f"TRAIN ACC ({max_train_acc:.4})",
        )

        plt.plot(
            x_val,
            val_acc[i],
            linestyle="--",
            color=f"tab:{colors[i]}",
            label=f"VALID ACC ({max_val_acc:.4})",
        )


    plt.xlabel("epoch no.")
    plt.ylabel("accuracy")
    plt.legend(loc=acc_legend_loc)
    plt.title("Training and Validation Accuracy")
    fig.savefig("sample_loss_acc_plot.png")
    plt.show()

    return

## <font style="color:blue">4.7. Main Function for Training</font>

In this function, we integrate all the various functions we've previously defined, creating a cohesive and streamlined workflow.

In [None]:
def main(model, summary_writer, scheduler=None, system_config=SystemConfig(), training_config=TrainingConfig(), data_augmentation=True):
    
    # Setup system configuration.
    setup_system(system_config)

    # Initialize data loader
    train_loader, valid_loader = get_data(
        batch_size=training_config.batch_size,
        data_root=training_config.data_root,
        img_size=training_config.img_size,
        num_workers=training_config.num_workers,
        data_augmentation=data_augmentation,
    )

    # Number of epochs to train.
    NUM_EPOCHS = training_config.epochs_count

    # Set acceleration device.
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    # Send model to device (GPU/CPU)
    model.to(device)

    # Initialize Adam optimizer.
    optimizer = optim.Adam(model.parameters(), lr=training_config.init_learning_rate)

    best_loss = torch.tensor(np.inf)

    # Epoch train & valid loss accumulator.
    epoch_train_loss = []
    epoch_valid_loss = []

    # Epoch train & valid accuracy accumulator.
    epoch_train_acc = []
    epoch_valid_acc = []

    # Trainig time measurement
    t_begin = time.time()

    for epoch in range(NUM_EPOCHS):
        train_loss, train_acc = train(training_config, model, optimizer, train_loader, epoch + 1, NUM_EPOCHS)
        val_loss, val_accuracy = validate(training_config, model, valid_loader, epoch + 1, NUM_EPOCHS)

        epoch_train_loss.append(train_loss)
        epoch_train_acc.append(train_acc)

        epoch_valid_loss.append(val_loss)
        epoch_valid_acc.append(val_accuracy)

        summary_writer.add_scalar("Loss/Train", train_loss, epoch)
        summary_writer.add_scalar("Accuracy/Train", train_acc, epoch)

        summary_writer.add_scalar("Loss/Validation", val_loss, epoch)
        summary_writer.add_scalar("Accuracy/Validation", val_accuracy, epoch)

        if val_loss < best_loss:
            best_loss = val_loss
            print(f"\nModel Improved... Saving Model ... ", end="")
            torch.save(model.state_dict(), os.path.join(training_config.checkpoint_dir, training_config.save_model_name))
            print("Done.\n")

        print(f"{'='*72}\n")

    print(f"Total time: {(time.time() - t_begin):.2f}s, Best Loss: {best_loss:.3f}")

    return epoch_train_loss, epoch_train_acc, epoch_valid_loss, epoch_valid_acc

## <font style="color:blue">4.8. Define Model</font>

In this section, we will define the architecture of the Convolutional Neural Network (CNN) model and proceed to train it, setting the stage for learning and adapting to our specific dataset through rigorous training cycles.


In [None]:
'''
class MyModel(nn.Module):
    def __init__(self):
        super().__init__()

        # Convolution layers
        self._body = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=7),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )

        # Fully connected layers
        self._head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=64 * 52 * 52, out_features=100),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=100, out_features=3),
        )

    def forward(self, x):
        # Apply feature extractor
        x = self._body(x)

        # Apply classification head
        x = self._head(x)

        return x
'''

class MyModel(nn.Module):
    def __init__(self, num_classes=3, dropout_prob=0.5):
        super().__init__()

        self._body = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # smaller kernel, preserve spatial
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

        )

        # Dummy input to calculate flattened size
        self._flattened_size = self._get_flattened_size()

        self._head = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(256, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_prob),
            nn.Linear(512, num_classes),
        )

        def _get_flattened_size(self):
            with torch.no_grad():
                dummy = torch.zeros(1, 3, 224, 224)
                out = self._body(dummy)
                return out.view(1, -1).shape[1]
    
        def forward(self, x):
            x = self._body(x)
            x = self._head(x)
            return x


## <font style="color:blue">4.9. Training</font>


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torchmetrics.classification import MulticlassAccuracy
from torchmetrics import MeanMetric
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
model = MyModel()
print(model)

training_config = TrainingConfig()

# Model checkpoint log dir setup.
training_config, current_version_name = setup_log_directory(training_config)

# Tensorboard log dir setup.
summary_writer = SummaryWriter(training_config.log_dir)

In [None]:
from torchvision import transforms, datasets

In [None]:
# Train and Validate
train_loss, train_acc, val_loss, val_acc = main(
    model,
    summary_writer=summary_writer,
    scheduler=None,
    system_config=SystemConfig(),
    training_config=training_config,
    data_augmentation=False,
)

## <font style="color:blue">4.10. Loss and Accuracy Plot</font>


In [None]:
plot_loss_accuracy(
    train_loss=[train_loss],
    val_loss=[val_loss],
    train_acc=[train_acc],
    val_acc=[val_acc],
    colors=["blue"],
    loss_legend_loc="upper center",
    acc_legend_loc="upper left",
)

# <font style="color:blue">Step 5: Sample Prediction</font><a name="predictions"></a>

Show some sample predictions.


## <font style="color:blue">5.1. Make Predictions</font>


In [None]:
def prediction(model, device, batch_input):
    data = batch_input.to(device)

    with torch.no_grad():
        output = model(data)

    # Score to probability using softmax.
    prob = F.softmax(output, dim=1)

    # Get the max probability.
    pred_prob = prob.data.max(dim=1)[0]

    # Get the index of the max probability.
    pred_index = prob.data.max(dim=1)[1]

    return pred_index.cpu().numpy(), pred_prob.cpu().numpy()

## <font style="color:blue">5.2. Get Predictions on a Batch</font>


In [None]:
def get_sample_prediction(model, data_root, img_size, mean, std):
    batch_size = 15

    if torch.cuda.is_available():
        device = "cuda"
        num_workers = 8
    else:
        device = "cpu"
        num_workers = 2

    # It is important to do model.eval() before prediction.
    model.eval()

    # Send model to cpu/cuda according to your system configuration.
    model.to(device)

    # Transformed data
    valid_dataset_trans = datasets.ImageFolder(root=data_root, transform=image_common_transforms(img_size, mean, std))

    # Original image dataset
    valid_dataset = datasets.ImageFolder(root=data_root, transform=image_preprocess_transforms(img_size))

    data_len = valid_dataset.__len__()

    interval = int(data_len / batch_size)

    imgs = []
    inputs = []
    targets = []
    for i in range(batch_size):
        index = i * interval
        trans_input, target = valid_dataset_trans.__getitem__(index)
        img, _ = valid_dataset.__getitem__(index)

        imgs.append(img)
        inputs.append(trans_input)
        targets.append(target)

    inputs = torch.stack(inputs)

    cls, prob = prediction(model, device, batch_input=inputs)

    plt.style.use("default")
    plt.rcParams["figure.figsize"] = (15, 9)
    fig = plt.figure()

    for i, target in enumerate(targets):
        plt.subplot(3, 5, i + 1)
        img = transforms.functional.to_pil_image(imgs[i])
        plt.imshow(img)
        plt.gca().set_title(f"P:{valid_dataset.classes[cls[i]]}({prob[i]:.2}), T:{valid_dataset.classes[targets[i]]}")
    plt.show()

    return

## <font style="color:blue">5.3. Load Model and Run Inference</font>


Next, we will reload the best saved model and use the `get_sample_prediction` function to make some sample predictions. This step is instrumental in visually assessing the performance of our model on the validation dataset, providing a quick and practical insight into how well our model generalizes to new, unseen data.


In [None]:
trained_model = MyModel()
trained_model = load_model(
    trained_model, 
    model_dir=training_config.checkpoint_dir, 
    model_file_name=training_config.save_model_name
)

train_data_path = os.path.join(training_config.data_root, "Train")
valid_data_path = os.path.join(training_config.data_root, "Valid")

mean, std = get_mean_std(train_data_path, img_size=training_config.img_size)

get_sample_prediction(trained_model, valid_data_path, img_size=training_config.img_size, mean=mean, std=std)

Despite training the network on a very small subset of data, you can see that the predictions are not that bad. This means that our model is ready for training.


# <font style="color:red">Step 6. Display Mistakes [15 points] </font><a name="display"></a>

The code shown above demonstrates sample predictions. However, since correct predictions don't highlight areas for improvement, it's essential to focus on the errors. Therefore, let's write a similar function specifically designed to display only the mispredictions made by the network, allowing us to identify and address the shortcomings in our model.

**You have to display only 15 images.**


In [None]:
train_loader, valid_loader = get_data(
    batch_size=training_config.batch_size,
    data_root=training_config.data_root,
    img_size=training_config.img_size,
    num_workers=training_config.num_workers,
    data_augmentation=False
)

In [None]:
display_misclassified_images(
    model=model,
    dataloader=valid_loader,
    class_names=["cat", "dog", "panda"],
    device=training_config.device,
    mean=(0.4573, 0.4348, 0.3884),
    std=(0.2686, 0.2601, 0.2600),
    sample_size=15,
    collect_limit=500  # increase the limit for deeper look
)

In [None]:
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF
import random
from collections import Counter

def denormalize(tensor, mean, std):
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor

def display_misclassified_images(model, dataloader, class_names, device="cpu", mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0), sample_size=15, collect_limit=100):
    model.eval()
    model.to(device)

    misclassified = []

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)

            mismatches = preds != labels
            if mismatches.any():
                for img, true, pred in zip(images[mismatches], labels[mismatches], preds[mismatches]):
                    misclassified.append((img.cpu(), true.cpu(), pred.cpu()))
                    if len(misclassified) >= collect_limit:
                        break
            if len(misclassified) >= collect_limit:
                break

    if not misclassified:
        print("🎉 No misclassifications found!")
        return

    # Optional: Breakdown by true class
    label_counts = Counter([class_names[true] for _, true, _ in misclassified])
    print("🔍 Misclassification breakdown by true label:")
    for label, count in label_counts.items():
        print(f"  {label}: {count}")

    # Randomly sample 15 to display
    sampled = random.sample(misclassified, k=min(sample_size, len(misclassified)))

    # Plot
    plt.figure(figsize=(15, 10))
    for idx, (img, true, pred) in enumerate(sampled):
        img = denormalize(img.clone(), mean, std)
        img = torch.clamp(img, 0, 1)
        img = TF.to_pil_image(img)

        plt.subplot(3, 5, idx + 1)
        plt.imshow(img)
        plt.title(f"True: {class_names[true]}\nPred: {class_names[pred]}")
        plt.axis("off")

    plt.suptitle("Random 15 Misclassified Images", fontsize=18)
    plt.tight_layout()
    plt.show()


In [None]:
import torch
import torch.nn.functional as F
from torchmetrics.classification import MulticlassAccuracy
from torchmetrics import MeanMetric

def evaluate_model(model, dataloader, num_classes=3, device="cpu"):
    model.eval()
    model.to(device)

    acc_metric = MulticlassAccuracy(num_classes=num_classes, average="micro")
    loss_metric = MeanMetric()

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = F.cross_entropy(outputs, labels)

            preds = outputs.argmax(dim=1)
            acc_metric.update(preds.cpu(), labels.cpu())
            loss_metric.update(loss.item())  # ✅ FIXED here

    acc = acc_metric.compute()
    avg_loss = loss_metric.compute()

    print(f"✅ Model Accuracy: {acc:.4f} ({acc*100:.2f}%)")
    print(f"📉 Average Loss: {avg_loss:.4f}")

    return acc, avg_loss


In [None]:
import torch
import torch.nn.functional as F
from torchmetrics.classification import MulticlassAccuracy
from torchmetrics import MeanMetric

def evaluate_model(model, dataloader, config):
    """
    Evaluate the model on the given dataloader and print accuracy and loss.
    """
    model.eval()
    model.to(config.device)

    accuracy = MulticlassAccuracy(num_classes=config.num_classes, average="micro").to(config.device)
    mean_loss = MeanMetric().to(config.device)

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(config.device), labels.to(config.device)

            outputs = model(images)
            loss = F.cross_entropy(outputs, labels)
            preds = outputs.argmax(dim=1)

            accuracy.update(preds, labels)
            mean_loss.update(loss)

    acc_value = accuracy.compute().item()
    loss_value = mean_loss.compute().item()

    print(f"✅ Model Accuracy: {acc_value:.4f} ({acc_value * 100:.2f}%)")
    print(f"📉 Average Loss: {loss_value:.4f}")

    return acc_value, loss_value


In [None]:
# After training is done:
evaluate_model(model, valid_loader, config)
