In [None]:
import os

from collections import Counter

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from torch.utils.data import DataLoader, random_split
from torchinfo import summary
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

torch.backends.cudnn.deterministic = True

Task 1.5.1:

Get the GPU device, if it is available. Store the device name in the variable device.

In [None]:
if torch.cuda.is_available():

    device = "cuda"
elif torch.backends.mps.is_available():
    device="mps"
else:
    device="cpu"

Load the Dataset
The data for this assignment is contained in the directory sea_creatures. In this directory, there are two folders. We'll use train to train our classification model, and then submit predictions about the test images to the grader.

In [None]:
os.listdir("sea_creatures")

Task 1.5.2:

Find the names of the classes we will be working with. Images for each class are inside folders within the sea_creatures/train folder. Make a list of the class names (each corresponding to a directory name). Your list should be named classes.

In [None]:
train_dir = os.path.join("sea_creatures/train")
classes = os.listdir(train_dir)
print(classes)

Task 1.5.3:

Build a transformer pipeline. It should ensure the images are in RGB format, scale them to 224
224 pixels, and convert them into a PyTorch tensor. You will probably find ConvertToRGB useful.

In [None]:
height = 224
width = 224


class ConvertToRGB:
    def __call__(self, img):
        if img.mode != "RGB":
            img = img.convert("RGB")
        return img


transform = transforms.Compose([
    # ... your steps ...
     ConvertToRGB(),


    # ... your steps ...
      transforms.Resize((224,224)),

    # ... your steps ...
    transforms.ToTensor()
])

print(transform)

Task 1.5.4:

Test that the transformer pipeline is working. Load in the specified training image and transform it. Check that you get a 3
224
224 tensor.

In [None]:
sample_file = "sea_creatures/train/Dolphin/10004986625_0f786ab86b_b.jpg"

image = Image.open (sample_file)# load your image

transformed_image = transform(image)
print(transformed_image.shape)

In [None]:
Task 1.5.5:

Create a DataSet for the training data (using the ImageFolder subclass). It should apply the transformer pipeline.

In [None]:
dataset = datasets.ImageFolder(root=train_dir,transform=transform)
print("Image size", dataset[0][0].shape)
print("Label", dataset[0][1])

Task 1.5.6:

Calculate the class distribution. Store this in the variable class_distribution as a dictionary. The keys should be the class names. The values should be the number of training samples for the class.

In [None]:
# Step 1: Get counts per class index
counts = Counter(x[1] for x in tqdm(dataset))
print("The counts dictionary:", counts)

# Step 2: Get class-to-index mapping
class_to_index = dataset.class_to_idx
print("The class_to_idx dictionary:", class_to_index)

# Step 3: Map class names to their counts
class_distribution = {class_name: counts[idx] for class_name, idx in class_to_index.items()}
print("Class distribution:", class_distribution)

Task 1.5.7:

Create a DataLoader that loads from this DataSet in batches of 32.

In [None]:
batch_size = 32
dataset_loader = DataLoader(dataset,batch_size=batch_size)

# Get one batch
first_batch = next(iter(dataset_loader))

print(f"Shape of one batch: {first_batch[0].shape}")
print(f"Shape of labels: {first_batch[1].shape}")

Task 1.5.8: Calculate the mean and standard deviation of each channel in this data set.

Fill the missing lines in the get_mean_std function and invoke it with the right dataset.

This will calculate the correct values for mean and std.

In [None]:
def get_mean_std(loader):
    """Computes the mean and standard deviation of image data.

    Input: a `DataLoader` producing tensors of shape [batch_size, channels, pixels_x, pixels_y]
    Output: the mean of each channel as a tensor, the standard deviation of each channel as a tensor
            formatted as a tuple (means[channels], std[channels])"""

    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in tqdm(loader):
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1
    # Compute the mean from the channels_sum and num_batches
    mean = channels_sum /num_batches
    # Compute the standard deviation form channels_squared_sum, num_batches,
    # and the mean.

    std = (channels_squared_sum / num_batches - mean**2) ** 0.5

    return mean, std


mean, std = get_mean_std(dataset_loader)

print(f"Mean: {mean}")
print(f"Standard deviation: {std}")

Task 1.5.9:

Build a new transformer pipeline that normalizes the channels according to the mean and standard deviation above. The pipeline should be assigned to the variable transform_norm. Afterwards, use the pipeline to create a normalized data set and store it in the variable norm_dataset.

In [None]:
transform_norm = transforms.Compose(
    [
        ConvertToRGB(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)
print(transform_norm)

In [None]:
norm_dataset =datasets.ImageFolder(root=train_dir,transform=transform_norm)




print("Image size", norm_dataset[0][0].shape)
print("Label", norm_dataset[0][1])

Task 1.5.10:

Split the normalized data set into a training set and a validation set. 80% of the data should be in the training set, and 20% in the validation set.

In [None]:
g = torch.Generator()
g.manual_seed(42)

train_dataset, val_dataset = random_split(norm_dataset,[0.80,0.20], generator=g)

print("Training data set size:", len(train_dataset))
print("Validation data set size:", len(val_dataset))

Task 1.5.11:

Set up data loaders for both the training and validation data sets. Use the same batch size as before. Remember to set shuffle=True on the training loader.

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

Task 1.5.12:

Start setting up the network. We'll begin with three layers:

2D convolution with sixteen
 kernels
ReLU activation
Max pooling with
 kernels (and a stride of
)

Task 1.5.13: Add three more layers to the network.

2D convolution with thirty-two
 kernels
ReLU activation
Max pooling with
 kernels

In [None]:
import torch
import torch.nn as nn


model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4)
)




In [None]:
# Add these layers to the model

summary(model, input_size=(batch_size, 3, height, width))

In [None]:
# Add the new layers

model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),  # [32, 3, 224, 224] -> [32, 16, 224, 224]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                                          # -> [32, 16, 56, 56]

    nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1), # -> [32, 32, 56, 56]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                                          # -> [32, 32, 14, 14]

    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1), # -> [32, 64, 14, 14]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                                          # -> [32, 64, 3, 3]

    nn.Flatten()                                                                    # -> [32, 64 * 3 * 3] = [32, 576]

)


Task 1.5.15: Add the final layers to the model

Drop-out (with
)
Linear layer with
 outputs (check the summary above to get the correct number of inputs)
ReLU activation
Drop-out
Linear output layer with the appropriate number of output

In [None]:
# Add the final layers
model = nn.Sequential(
    # Convolutional Layers
    nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),     # [32, 3, 224, 224] → [32, 16, 224, 224]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                    # → [32, 16, 56, 56]

    nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),    # → [32, 32, 56, 56]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                    # → [32, 32, 14, 14]

    nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),    # → [32, 64, 14, 14]
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=4, stride=4),                    # → [32, 64, 3, 3]

    # Flatten the features
    nn.Flatten(),                                             # → [32, 64 * 3 * 3] = [32, 576]

    # Fully Connected (Dense) Layers
    nn.Dropout(0.5),
    nn.Linear(576, 500),                                      # → [32, 500]
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(500, 9)                                         # → [32, 9] (for 9 output classes)
)


In [None]:
summary(model, input_size=(batch_size, 3, height, width))

Task 1.5.16:

Prepare for training. Define the loss function, create an optimizers, and send the model to the GPU device.

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.to(device)

Task 1.5.17:

Train model for
 epochs

In [None]:

epochs = 10
# Train the model for 10 epochs
train(model, optimizer, loss_fn, train_loader, val_loader, epochs, device)

Evaluate the Model Performance






Task 1.5.18:

Make predictions for all of the images in the validation set. Start by calculating the probabilities using the predict function (from our training.py module). And then calculate the predicted class based in the probabilities.

In [None]:
# Don't change this
fig, ax = plt.subplots(figsize=(10, 6))

cm = confusion_matrix(targets,predictions.cpu())

# Get the class names
classes = class_distribution

# Display the confusion matrix (don't change this)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(cmap=plt.cm.Blues, xticks_rotation="vertical", ax=ax)

Task 1.5.19:

Create the confusion matrix for the predictions on the validation set. We have provided the actual classes in the targets variable.

In [None]:
targets = []

for _, labels in tqdm(val_loader):
    targets.extend(labels.tolist())

In [None]:
# Don't change this
fig, ax = plt.subplots(figsize=(10, 6))

cm = confusion_matrix(targets,predictions.cpu())

# Get the class names
classes = class_distribution

# Display the confusion matrix (don't change this)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
disp.plot(cmap=plt.cm.Blues, xticks_rotation="vertical", ax=ax)

Task 1.5.20:

Create a data set for the test data. It is located in the sea_creatures/test directory. Then create a data loader from this data set. DO NOT shuffle this data!

In [None]:
test_dir =os.path.join("sea_creatures/train")

test_dataset =  datasets.ImageFolder(root='sea_creatures/test', transform=transform)

print("Number of test images:", len(test_dataset))

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

1.5.21
Make a prediction for each of the test images



In [None]:
# Predict the probabilities for each test image
test_probabilities = predict(model,val_loader,device)

# Get the index associated with the largest probability for each test image
test_predictions = torch.argmax(probabilities, dim=1)

print("Number of predictions:", test_predictions.shape)

:Final checks
You can now check how accurate your model is by sampling some images from our /test directory. These images are not labled, so you'll need to check it manually.

The code below randomly samples 12 images from the test directory and shows them in a grid alongside its predicted label. Run it as many times as you want to get different samples.

How is it working?

In [None]:
import matplotlib.pyplot as plt
import random

# Sample 12 random indices from the test dataset
sample_indices = random.sample(range(len(test_loader.dataset.samples)), 12)

# Create a grid of 4x3 subplots
fig, axes = plt.subplots(4, 3, figsize=(20, 10))

# Iterate over the sampled indices and plot the corresponding images
for ax, idx in zip(axes.flatten(), sample_indices):
    image_path = test_loader.dataset.samples[idx][0]
    img = Image.open(image_path)

    # Display the image on the axis
    ax.imshow(img)
    ax.axis('off')

    # Get the predicted class for this image
    predicted_class = test_classes[idx]

    # Set the title of the subplot to the predicted class
    ax.set_title(f"Predicted: {predicted_class}", fontsize=14)

plt.tight_layout()