## Step 0. Folder Structure
```
3. Deep Learning Project/
├── dataset_split/
│   ├── train/
│   │   ├── african_elephant (780 images)
│   │   ├── airliner (780 images)
│   │   └── ... (8 more folders)
│   ├── val/
│   │   ├── african_elephant (260 images)
│   │   ├── airliner (260 images)
│   │   └── ... (8 more folders)
│   └── test (2600 images)
├── data_preprocessed/
│   ├── labels_train.pt
│   ├── labels_val.pt
│   ├── tensor_test.pt
│   ├── tensor_train.pt
│   └── tensor_val.pt
├── models/
│   ├── resnet18_checkpoint.pkl
│   └── resnet34_checkpoint.pkl
├── src/
│   └── part3.ipynb
├── src_datasplit/
│   └── data_split.ipynb
├── MSA.yaml
├── README.md
└── IEEE_Report_Template.docx
```


In [1]:
from typing import Tuple, List
import os
import cv2
from tqdm import tqdm
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
import copy
import matplotlib.pyplot as plt
import csv

## Step 1 Preprocessing
1. Load images and labels from folders into lists.
2. Resize the images.
3. Convert the image data type as float32 and limit the range in [0, 1].
4. Convert data from NHWC to NCHW and creat a PyTorch tensor from it.
5. Per-channel standardization of the dataset using training set statistics (mean and std computed independently for each color channel).
6. Saving the pre-processed sets as .pt files in the correct directory.
7. Load the pre-processed sets from .pt files. 

In [2]:
import os
from typing import Tuple, List
import numpy as np
import cv2

def image_loader(set_dir: str) -> Tuple[List[np.ndarray], List[str]]:
    """
    Load all images from a dataset directory (train/val/test) into memory.
    """
    images = []
    labels = []

    # Check if it's a test set (i.e., no subdirectories)
    if all(os.path.isfile(os.path.join(set_dir, f)) for f in os.listdir(set_dir)):
        # Test set: load files in sorted order by filename
        file_names = sorted(os.listdir(set_dir), key=lambda x: int(os.path.splitext(x)[0]))
        for fname in file_names:
            img_path = os.path.join(set_dir, fname)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
        labels = []  # test set has no labels

    else:
        # Train/val set: subfolders = class names
        for class_name in sorted(os.listdir(set_dir)):
            class_dir = os.path.join(set_dir, class_name)
            if os.path.isdir(class_dir):
                for fname in sorted(os.listdir(class_dir)):
                    img_path = os.path.join(class_dir, fname)
                    img = cv2.imread(img_path)
                    if img is not None:
                        images.append(img)
                        labels.append(class_name)

    return images, labels

In [3]:
def img_resize(images: List[np.ndarray], target_size: int = 224) -> np.ndarray:
    """
    Resize images with the shortest edge to target_size and perform center cropping to square dimensions.
    """
    if not images:
        return np.array([])

    processed_images = []
    for img in images:
        if img is None or len(img.shape) != 3 or img.shape[2] != 3:
            raise ValueError("Each image must be in HWC format with 3 channels (BGR).")

        h, w = img.shape[:2]
        
        # Compute scale factor to resize shortest side to target_size
        scale = target_size / min(h, w)
        new_h, new_w = int(h * scale), int(w * scale)

        # Resize with preserved aspect ratio
        resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

        # Center crop to target_size x target_size
        start_x = (new_w - target_size) // 2
        start_y = (new_h - target_size) // 2
        cropped = resized[start_y:start_y + target_size, start_x:start_x + target_size]

        processed_images.append(cropped)

    return np.stack(processed_images)


In [4]:
# Load images

images_train, labels_train = image_loader("../dataset_split/train")
images_val, labels_val = image_loader("../dataset_split/val")
images_test, _ = image_loader("../dataset_split/test")

In [None]:
# Resize the images

images_train = img_resize(images_train, 224)
images_val = img_resize(images_val, 224)
images_test = img_resize(images_test, 224)

ValueError: all input arrays must have the same shape

In [None]:
# TODO: 3. Convert the image data type as float32 and limit the range in [0, 1].

images_train = images_train.astype(np.float32) / 255.0
images_val = images_val.astype(np.float32) / 255.0
images_test = images_test.astype(np.float32) / 255.0

AttributeError: 'list' object has no attribute 'astype'

In [None]:
# TODO: 4. Convert data from NHWC to NCHW and creat a PyTorch tensor from it.

tensor_train = torch.from_numpy(images_train).permute(0, 3, 1, 2).float()
tensor_val = torch.from_numpy(images_val).permute(0, 3, 1, 2).float()
tensor_test = torch.from_numpy(images_test).permute(0, 3, 1, 2).float()

In [None]:
# TODO: 5. Per-channel standardization of the dataset using training set statistics (mean and std computed independently for each color channel).

train_mean = tensor_train.mean(dim=(0, 2, 3))
train_std = tensor_train.std(dim=(0, 2, 3))

tensor_train = (tensor_train - train_mean[None, :, None, None]) / train_std[None, :, None, None]
tensor_val = (tensor_val - train_mean[None, :, None, None]) / train_std[None, :, None, None]
tensor_test = (tensor_test - train_mean[None, :, None, None]) / train_std[None, :, None, None]


In [None]:
# TODO: 6. Save the pre-processed sets as .pt files.

# 6.1 Save the preprocessed tensors
torch.save(tensor_train, "tensor_train.pt")
torch.save(tensor_val, "tensor_val.pt")
torch.save(tensor_test, "tensor_test.pt")

# 6.2 Create unique label list and mappings
unique_labels = sorted(set(labels_train))  # assuming labels_train was loaded earlier
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
index_to_label = {idx: label for label, idx in label_to_index.items()}

# 6.3 Map string labels to integer indices
labels_train_index = [label_to_index[label] for label in labels_train]
labels_val_index = [label_to_index[label] for label in labels_val]

# Convert to PyTorch tensors
labels_train_index = torch.tensor(labels_train_index, dtype=torch.long)
labels_val_index = torch.tensor(labels_val_index, dtype=torch.long)

# 6.4 Save label-related mappings and indices
torch.save(labels_train_index, "labels_train_index.pt")
torch.save(labels_val_index, "labels_val_index.pt")
torch.save(label_to_index, "label_to_index.pt")
torch.save(index_to_label, "index_to_label.pt")


In [None]:
# TODO: 7. Load the pre-processed sets from .pt files. The data can be used directly in future without pre-processing once more by loading from the folder.

tensor_train = torch.load("tensor_train.pt")
tensor_val = torch.load("tensor_val.pt")
tensor_test = torch.load("tensor_test.pt")

labels_train_index = torch.load("labels_train_index.pt")
labels_val_index = torch.load("labels_val_index.pt")

# Step 2 Train the teacher model (ResNet34)
1. Set hyperparameters for the training process.
2. Create a pretrained ResNet34.
3. Define the optimizer, loss function, etc.
4. Train ResNet34 and save the best-performing parameters to 'resnet34_checkpoint.pkl'.
5. Plot the loss and accuracy curves.

In [None]:
# Verify data shapes

num_classes = max(labels_train_index) + 1
print(num_classes)
print(tensor_train.shape)
print(tensor_val.shape)
print(tensor_test.shape)
print(labels_train_index.shape)
print(labels_val_index.shape)

In [None]:
# TODO: 1. Set hyperparameters for the training process.

MAX_EPOCH = 
INIT_LR = 
BATCH_SIZE = 

# Specify the training device (automatically detects GPU if available)
# Usage example: 
#   tensor = tensor.to(device)  # Moves tensor to selected device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# TODO: 2. Create a pretrained ResNet34.

# 2.1 Use torchvision to create a pretrained ResNet34.
resnet34 = 

# 2.2 Modify the output layer so that the model aligns with this project.

# 2.2 Move the model to selected device.


In [None]:
# TODO: 3. Define the optimizer, loss function, etc.

optimizer = 
criterion = 

In [None]:
# TODO: 4. Train ResNet34 and save the best-performing parameters to 'resnet34_checkpoint.pkl'.
# Hint: May not need too many epochs as this is a pretrained model.

train_accs_resnet34, train_losses_resnet34, val_accs_resnet34, val_losses_resnet34 = [], [], [], []


In [None]:
# TODO: 5. Plot the loss and accuracy curves.


# Step 3 Train the student model (ResNet18)
1. Set hyperparameters for the distillation process.
2. Create a NON-PRETRAINED ResNet18 and move the model to selected device.
3. Load the saved ResNet34 and move the model to selected device.
4. Define the optimizer, loss function, etc.
5. Train ResNet18 and save the best-performing parameters to 'resnet18_checkpoint.pkl'.
6. Plot the loss and accuracy curves.

In [None]:
# Verify data shapes

num_classes = max(labels_train_index) + 1
print(num_classes)
print(tensor_train.shape)
print(tensor_val.shape)
print(tensor_test.shape)
print(labels_train_index.shape)
print(labels_val_index.shape)

In [None]:
# TODO: 1. Set hyperparameters for the distillation process.

MAX_EPOCH =
INIT_LR =
BATCH_SIZE =
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TEMPERATURE = 
LOSS_RATIO = 

In [None]:
# TODO: 2. Create a NON-PRETRAINED ResNet18 and move the model to selected device.

resnet18 = 

In [None]:
# TODO: 3. Load the saved ResNet34 and move the model to selected device.
# Hint: Enable the evaluation mode to prevent updating the parameters.

resnet34 = 

In [None]:
# TODO: 4. Define the optimizer, loss function, etc.

optimizer = 
scheduler = 

In [None]:
# TODO: 5. Train ResNet18 and save the best-performing parameters to 'resnet18_checkpoint.pkl'.
# Hint: Use the correct loss function for knowledge distillation.
#       Details can be fund in https://docs.pytorch.org/tutorials/beginner/knowledge_distillation_tutorial.html.

train_accs_resnet18, train_losses_resnet18, val_accs_resnet18, val_losses_resnet18 = [], [], [], []


In [None]:
# TODO: 6. Plot the loss and accuracy curves.



# Step 4 Predict labels in the testing set
1. Load the saved ResNet18 and move it to the selected device.
2. Do prediction using the images in the testing set.
3. Write the results into the CSV file for submission.

In [None]:
# TODO: 1. Load the saved ResNet18 and move it to the selected device.
# Hint: Enable the evaluation mode to prevent updating the parameters.

resnet18 = 


In [None]:
# TODO: 2. Do prediction using the images in the testing set.
predictions = []


In [None]:
# TODO: 3. Write the results into the CSV file for submission.
# Hint: The CSV file should be in the format of 'file_name' and 'label'.
#       The submission.csv should look like:
#           file_name,label
#           0.jpg,sunglasses
#           ...

index_to_label = 
