In [None]:
! pip install -r requirements.txt

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import matplotlib.pyplot as plt


# Define transforms
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# Load datasets
dataset = datasets.FashionMNIST(root='data', train=True, download=True, transform=transform)


# Fashion MNIST
A dataset of fashion images, to practice computer vision. The dataset contains 60,000 training images and 10,000 test images of clothing items, such as shirts, shoes, and bags.

In [None]:
class_names = dataset.classes
print("The class names are: ", class_names)

Lets look at a picture from the dataset.

In [None]:
first_image, first_label = dataset[0]
img = first_image.numpy().transpose((1, 2, 0))
print(f"This is a {class_names[first_label]}")
print(f"The shape of the image is {img.shape}")
plt.imshow(img)

See what pooling does

In [None]:
img = dataset[0][0].unsqueeze(0)  # Add batch dimension
pool = torch.nn.MaxPool2d(kernel_size=2, stride=4)
pooled_img = pool(img)
plt.imshow(pooled_img.squeeze(0).permute(1, 2, 0).numpy())


This can be done multiple times. You can see how the image gets smaller and smaller.

In [None]:
dobbel_pooled = pool(pooled_img)
plt.imshow(dobbel_pooled.squeeze(0).permute(1, 2, 0).numpy())

# CNN Filters
Lets look at the filters in a convolutional neural network. Here is an example picture of a cat: 

In [None]:
image_path = "data/cat.png"
image = plt.imread(image_path)
plt.imshow(image)

If we apply a filter to the image, we can see how it highlights certain features. For example, a filter that detects edges will highlight the edges in the image.

In [None]:
filter = torch.tensor([[[[-1, -1, -1],
                                 [-1,  8, -1],
                                 [-1, -1, -1]]]], dtype=torch.float32)  # Edge detection filter
conv_layer = torch.nn.Conv2d(in_channels=3, out_channels=1, kernel_size=3, padding=1, bias=False)
relu_layer = torch.nn.ReLU()
conv_layer.weight = torch.nn.Parameter(filter.repeat(1, 3, 1, 1))  # Repeat filter for 3 input channels
image_tensor = torch.tensor(image).permute(2, 0, 1).unsqueeze(0).float()  # Add batch dimension and convert to float
filtered_image = conv_layer(image_tensor)
filtered_image = relu_layer(filtered_image)

plt.imshow(filtered_image.squeeze(0).permute(1, 2, 0).detach().numpy(), cmap='gray')

We can see how another filter might affect the image.

In [None]:
filter = torch.tensor([[[[1, 0, -1],
                                 [1, 0, -1],
                                 [1, 0, -1]]]], dtype=torch.float32)  # Another filter
conv_layer.weight = torch.nn.Parameter(filter.repeat(1, 3, 1, 1))  # Repeat filter for 3 input channels
filtered_image = conv_layer(image_tensor)
filtered_image = relu_layer(filtered_image)
plt.imshow(filtered_image.squeeze(0).permute(1, 2, 0).detach().numpy(), cmap='gray')

# Create a model


In [None]:
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
import torch

class_names = dataset.classes
# Create a model
model = Sequential(
    Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1),
    MaxPool2d(kernel_size=2, stride=2),
    Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
    MaxPool2d(kernel_size=2, stride=2),
    Flatten(),
    Linear(in_features=32 * 32 * 32, out_features=128),
    Linear(in_features=128, out_features=10)
)
# Test the model with the first image
img = dataset[0][0].unsqueeze(0)  # Add batch dimension
output = model(img)
predicted_class = torch.argmax(output, dim=1).item()
print(f"The model predicts this image as a {class_names[predicted_class]}")