## Step 0. Folder Structure
```
3. Deep Learning Project/
├── dataset_split/
│   ├── train/
│   │   ├── african_elephant (780 images)
│   │   ├── airliner (780 images)
│   │   └── ... (8 more folders)
│   ├── val/
│   │   ├── african_elephant (260 images)
│   │   ├── airliner (260 images)
│   │   └── ... (8 more folders)
│   └── test (2600 images)
├── data_preprocessed/
│   ├── labels_train.pt
│   ├── labels_val.pt
│   ├── tensor_test.pt
│   ├── tensor_train.pt
│   └── tensor_val.pt
├── models/
│   ├── resnet18_checkpoint.pkl
│   └── resnet34_checkpoint.pkl
├── src/
│   └── part3.ipynb
├── src_datasplit/
│   └── data_split.ipynb
├── MSA.yaml
├── README.md
└── IEEE_Report_Template.docx
```


In [1]:
from typing import Tuple, List
import os
import cv2
from tqdm import tqdm
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
import copy
import matplotlib.pyplot as plt
import csv

## Step 1 Preprocessing
1. Load images and labels from folders into lists.
2. Resize the images.
3. Convert the image data type as float32 and limit the range in [0, 1].
4. Convert data from NHWC to NCHW and creat a PyTorch tensor from it.
5. Per-channel standardization of the dataset using training set statistics (mean and std computed independently for each color channel).
6. Saving the pre-processed sets as .pt files in the correct directory.
7. Load the pre-processed sets from .pt files. 

In [3]:
def image_loader(set_dir: str) -> Tuple[List[np.ndarray], List[str]]:
    """
    Load all images from a dataset directory (train/val/test) into memory.
    """
    images = []
    labels = []

    # Check if it's a test set (i.e., no subdirectories)
    if all(os.path.isfile(os.path.join(set_dir, f)) for f in os.listdir(set_dir)):
        # Test set: load files in sorted order by filename
        file_names = sorted(os.listdir(set_dir), key=lambda x: int(os.path.splitext(x)[0]))
        for fname in file_names:
            img_path = os.path.join(set_dir, fname)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
        labels = []  # test set has no labels

    else:
        # Train/val set: subfolders = class names
        for class_name in sorted(os.listdir(set_dir)):
            class_dir = os.path.join(set_dir, class_name)
            if os.path.isdir(class_dir):
                for fname in sorted(os.listdir(class_dir)):
                    img_path = os.path.join(class_dir, fname)
                    img = cv2.imread(img_path)
                    if img is not None:
                        images.append(img)
                        labels.append(class_name)

    return images, labels

In [2]:
def img_resize(images: List[np.ndarray], target_size: int = 224) -> np.ndarray:
    processed = []
    for img in images:
        h, w = img.shape[:2]
        scale = target_size / min(h, w)
        new_h, new_w = int(h * scale), int(w * scale)
        resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

        # Calculate crop start with floor
        start_x = max((new_w - target_size) // 2, 0)
        start_y = max((new_h - target_size) // 2, 0)
        cropped = resized[start_y:start_y + target_size, start_x:start_x + target_size]

        # Pad if cropped too small (due to rounding edge-case)
        pad_h = target_size - cropped.shape[0]
        pad_w = target_size - cropped.shape[1]
        if pad_h > 0 or pad_w > 0:
            cropped = cv2.copyMakeBorder(
                cropped,
                0, pad_h, 0, pad_w,
                borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0]
            )

        assert cropped.shape == (target_size, target_size, 3), f"Bad shape: {cropped.shape}"
        processed.append(cropped)

    return np.stack(processed)


In [4]:
# Load images

images_train, labels_train = image_loader("../dataset_split/train")
images_val, labels_val = image_loader("../dataset_split/val")
images_test, _ = image_loader("../dataset_split/test")

In [None]:
# Resize the images

images_train = img_resize(images_train, 224)
images_val = img_resize(images_val, 224)
images_test = img_resize(images_test, 224)

In [None]:
# TODO: 3. Convert the image data type as float32 and limit the range in [0, 1].

images_train = images_train.astype(np.float32) / 255.0
images_val = images_val.astype(np.float32) / 255.0
images_test = images_test.astype(np.float32) / 255.0

In [7]:
# TODO: 4. Convert data from NHWC to NCHW and creat a PyTorch tensor from it.

tensor_train = torch.from_numpy(images_train).permute(0, 3, 1, 2).float()
tensor_val = torch.from_numpy(images_val).permute(0, 3, 1, 2).float()
tensor_test = torch.from_numpy(images_test).permute(0, 3, 1, 2).float()

In [8]:
# TODO: 5. Per-channel standardization of the dataset using training set statistics (mean and std computed independently for each color channel).

train_mean = tensor_train.mean(dim=(0, 2, 3))
train_std = tensor_train.std(dim=(0, 2, 3))

tensor_train = (tensor_train - train_mean[None, :, None, None]) / train_std[None, :, None, None]
tensor_val = (tensor_val - train_mean[None, :, None, None]) / train_std[None, :, None, None]
tensor_test = (tensor_test - train_mean[None, :, None, None]) / train_std[None, :, None, None]


In [9]:
# TODO: 6. Save the pre-processed sets as .pt files.

# 6.1 Save the preprocessed tensors
torch.save(tensor_train, "tensor_train.pt")
torch.save(tensor_val, "tensor_val.pt")
torch.save(tensor_test, "tensor_test.pt")

# 6.2 Create unique label list and mappings
unique_labels = sorted(set(labels_train))  # assuming labels_train was loaded earlier
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
index_to_label = {idx: label for label, idx in label_to_index.items()}

# 6.3 Map string labels to integer indices
labels_train_index = [label_to_index[label] for label in labels_train]
labels_val_index = [label_to_index[label] for label in labels_val]

# Convert to PyTorch tensors
labels_train_index = torch.tensor(labels_train_index, dtype=torch.long)
labels_val_index = torch.tensor(labels_val_index, dtype=torch.long)

# 6.4 Save label-related mappings and indices
torch.save(labels_train_index, "labels_train_index.pt")
torch.save(labels_val_index, "labels_val_index.pt")
torch.save(label_to_index, "label_to_index.pt")
torch.save(index_to_label, "index_to_label.pt")


In [3]:
# TODO: 7. Load the pre-processed sets from .pt files. The data can be used directly in future without pre-processing once more by loading from the folder.

tensor_train = torch.load("tensor_train.pt")
tensor_val = torch.load("tensor_val.pt")
tensor_test = torch.load("tensor_test.pt")

labels_train_index = torch.load("labels_train_index.pt")
labels_val_index = torch.load("labels_val_index.pt")

  tensor_train = torch.load("tensor_train.pt")
  tensor_val = torch.load("tensor_val.pt")
  tensor_test = torch.load("tensor_test.pt")
  labels_train_index = torch.load("labels_train_index.pt")
  labels_val_index = torch.load("labels_val_index.pt")


# Step 2 Train the teacher model (ResNet34)
1. Set hyperparameters for the training process.
2. Create a pretrained ResNet34.
3. Define the optimizer, loss function, etc.
4. Train ResNet34 and save the best-performing parameters to 'resnet34_checkpoint.pkl'.
5. Plot the loss and accuracy curves.

In [4]:
# Verify data shapes

num_classes = max(labels_train_index) + 1
print(num_classes)
print(tensor_train.shape)
print(tensor_val.shape)
print(tensor_test.shape)
print(labels_train_index.shape)
print(labels_val_index.shape)

tensor(10)
torch.Size([7800, 3, 224, 224])
torch.Size([2600, 3, 224, 224])
torch.Size([2600, 3, 224, 224])
torch.Size([7800])
torch.Size([2600])


In [5]:
# Hyperparameters
MAX_EPOCH = 5  # Number of epochs to train the model
INIT_LR = 1e-3  # Initial learning rate
BATCH_SIZE = 64  # Number of samples per batch


# Specify the training device (automatically detects GPU if available)
# Usage example: 
#   tensor = tensor.to(device)  # Moves tensor to selected device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
# TODO: 2. Create a pretrained ResNet34.

# Load the pretrained ResNet-34 model
resnet34 = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)

# Define the number of output classes (adjust as per your dataset)
num_classes = 10

# Modify the final fully connected layer to match the number of classes
resnet34.fc = nn.Linear(resnet34.fc.in_features, num_classes)

# Specify the training device (automatically detects GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the selected device
resnet34.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
# TODO: 3. Define the optimizer, loss function, etc.

optimizer = torch.optim.Adam(resnet34.parameters(), lr=0.001)

criterion = nn.CrossEntropyLoss()

In [11]:
best_acc = 0.0
best_model_wts = copy.deepcopy(resnet34.state_dict())

train_accs_resnet34, train_losses_resnet34 = [], []
val_accs_resnet34, val_losses_resnet34 = [], []

for epoch in range(MAX_EPOCH):
    print(f'Epoch {epoch+1}/{MAX_EPOCH}')
    
    # Training phase
    resnet34.train()
    running_loss = 0.0
    running_corrects = 0
    total = 0
    
    for inputs, labels in torch.utils.data.train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = resnet34(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)
        
    epoch_loss = running_loss / total
    epoch_acc = running_corrects.double() / total
    train_losses_resnet34.append(epoch_loss)
    train_accs_resnet34.append(epoch_acc.item())
    
    # Validation phase
    resnet34.eval()
    val_running_loss = 0.0
    val_running_corrects = 0
    val_total = 0
    
    with torch.no_grad():
        for inputs, labels in torch.utils.data.val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = resnet34(inputs)
            loss = criterion(outputs, labels)
            
            val_running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            val_running_corrects += torch.sum(preds == labels.data)
            val_total += labels.size(0)
    
    val_loss = val_running_loss / val_total
    val_acc = val_running_corrects.double() / val_total
    val_losses_resnet34.append(val_loss)
    val_accs_resnet34.append(val_acc.item())
    
    print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} | Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}')
    
    # Save best model weights
    if val_acc > best_acc:
        best_acc = val_acc
        best_model_wts = copy.deepcopy(resnet34.state_dict())
        torch.save(best_model_wts, 'resnet34_checkpoint.pkl')

# Load best model weights after training
resnet34.load_state_dict(best_model_wts)

Epoch 1/5


AttributeError: module 'torch.utils.data' has no attribute 'train_loader'

In [None]:
# TODO: 5. Plot the loss and accuracy curves.


# Step 3 Train the student model (ResNet18)
1. Set hyperparameters for the distillation process.
2. Create a NON-PRETRAINED ResNet18 and move the model to selected device.
3. Load the saved ResNet34 and move the model to selected device.
4. Define the optimizer, loss function, etc.
5. Train ResNet18 and save the best-performing parameters to 'resnet18_checkpoint.pkl'.
6. Plot the loss and accuracy curves.

In [None]:
# Verify data shapes

num_classes = max(labels_train_index) + 1
print(num_classes)
print(tensor_train.shape)
print(tensor_val.shape)
print(tensor_test.shape)
print(labels_train_index.shape)
print(labels_val_index.shape)

In [None]:
# TODO: 1. Set hyperparameters for the distillation process.

MAX_EPOCH =
INIT_LR =
BATCH_SIZE =
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TEMPERATURE = 
LOSS_RATIO = 

In [None]:
# TODO: 2. Create a NON-PRETRAINED ResNet18 and move the model to selected device.

resnet18 = 

In [None]:
# TODO: 3. Load the saved ResNet34 and move the model to selected device.
# Hint: Enable the evaluation mode to prevent updating the parameters.

resnet34 = 

In [None]:
# TODO: 4. Define the optimizer, loss function, etc.

optimizer = 
scheduler = 

In [None]:
# TODO: 5. Train ResNet18 and save the best-performing parameters to 'resnet18_checkpoint.pkl'.
# Hint: Use the correct loss function for knowledge distillation.
#       Details can be fund in https://docs.pytorch.org/tutorials/beginner/knowledge_distillation_tutorial.html.

train_accs_resnet18, train_losses_resnet18, val_accs_resnet18, val_losses_resnet18 = [], [], [], []


In [None]:
# TODO: 6. Plot the loss and accuracy curves.



# Step 4 Predict labels in the testing set
1. Load the saved ResNet18 and move it to the selected device.
2. Do prediction using the images in the testing set.
3. Write the results into the CSV file for submission.

In [None]:
# TODO: 1. Load the saved ResNet18 and move it to the selected device.
# Hint: Enable the evaluation mode to prevent updating the parameters.

resnet18 = 


In [None]:
# TODO: 2. Do prediction using the images in the testing set.
predictions = []


In [None]:
# TODO: 3. Write the results into the CSV file for submission.
# Hint: The CSV file should be in the format of 'file_name' and 'label'.
#       The submission.csv should look like:
#           file_name,label
#           0.jpg,sunglasses
#           ...

index_to_label = 
