# Exercise 3.7

Consider the MNIST dataset. Use a classification algorithm of your choice and compare accuracy based on the original images and on the images obtained by PCA dimension reduction.

## Preamble

I decided to use a pre-trained CNN to analyze how a model with an inductive bias for the task (image recognition) is affected by dimensionality reduction.

We use PyTorch to load and fine-tune ResNet50 pre-trained on ImageNet.

In [None]:
import numpy as np

from PIL import Image

import torch                              # PyTorch library
import torch.nn    as nn                  # neural-network
import torch.optim as optim               # optimizers
from torch.utils.data import DataLoader   # batch and shuffle dataset
from torch.utils.data import Dataset      # import Dataset class
from torchvision      import datasets, transforms, models
# `datasets.MNIST`  for the MNIST dataset
# `transforms`      for preprocessing images
# `models.resnet50` for the pretrained ResNet50 architecture

from sklearn.decomposition import PCA

## Setup

Checks if GPU is available (otherwise default on CPU). \
Move all computations to the selected device.

**Important**: on Colab you need to select the GPU from `Runtime > Change runtime type`.

In [None]:
print("CUDA available:\t",  torch.cuda.is_available())
print("CUDA devices:\t",    torch.cuda.device_count())
if torch.cuda.is_available():
  print("Current GPU:\t", torch.cuda.get_device_name(0))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CUDA available:	 True
CUDA devices:	 1
Current GPU:	 Tesla T4


MNIST images are 28x28 grayscale, while ImageNet ones are 224x224 RGB. \
We need to resize the images and replicate the channels, in order to adapt the ResNet architecture to the MNIST dataset.

In [None]:
transform = transforms.Compose([
    transforms.Resize(224),                       # scale 28x28 to 224x224
    transforms.Grayscale(num_output_channels=3),  # replicate the single channel 3 times
    transforms.ToTensor(),                        # converts image with pixel values 0-255 to FloatTensor of shape (3,224,224) with values in [0,1]
    transforms.Normalize(                         # applies channel‑wise normalization using ImageNet's statistics
        mean = [0.485, 0.456, 0.406],
        std  = [0.229, 0.224, 0.225]
    ),
])

Load the MNIST dataset. \
Each image is automatically passed through the `transform` pipeline.

In [None]:
train_ds = datasets.MNIST(root='./data', download=True, train=True,  transform=transform)
test_ds  = datasets.MNIST(root='./data', download=True, train=False, transform=transform)

# create mini-batches
#   shuffle=True randomizes training batches each epoch
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=64, shuffle=False)

100%|██████████| 9.91M/9.91M [00:00<00:00, 17.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 480kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.85MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.09MB/s]


Load the pre-trained ResNet50.

In [None]:
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 129MB/s]


The final fully-connected layer of ResNet outputs 1000 logits, but MNIST has only 10 classes (one per digit). \
So we substitute it with a new layer (with randomly initialized weights).

In [None]:
n_features = model.fc.in_features     # number of inputs of the fully-connected layer
model.fc = nn.Linear(n_features, 10)

Freeze parameters: we want to train only the final fully-connected layer.

In [None]:
# freeze all parameters
for param in model.parameters():
    param.requires_grad = False

# un‑freeze only the final fully‑connected layer
for param in model.fc.parameters():
    param.requires_grad = True


trainable = [n for n, p in model.named_parameters() if p.requires_grad]
print("Trainable layers:\n", trainable)

Trainable layers:
 ['fc.weight', 'fc.bias']


Define loss criterion and optimizer.

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

Move all model parameters to the selected device.

In [None]:
model = model.to(device)

## Execution

Define functions to train the classifier and evaluate on test set.

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
  model.train()           # enables dropout and batch‑norm in training mode
  running_loss = 0.0

  for imgs, labels in loader:
    imgs, labels = imgs.to(device), labels.to(device)   # move data to device

    optimizer.zero_grad()               # zero out gradients from the previous step
    outputs = model(imgs)               # forward pass
    loss = criterion(outputs, labels)   # compute loss
    loss.backward()                     # backward pass (backpropagation)
    optimizer.step()                    # update weights

    # accumulate the total loss over all examples seen so far in the epoch
    #   loss.item()  = average loss per example in the current batch
    #   imgs.size(0) = number of examples in the batch
    running_loss += loss.item() * imgs.size(0)

  # mean loss per example over the entire epoch
  epoch_loss = running_loss / len(loader.dataset)
  return epoch_loss


def evaluate(model, loader, device):
  model.eval()    # switches off dropout and uses running statistics for batch‑norm
  correct = 0

  with torch.no_grad():   # disables gradient tracking to save memory
    for imgs, labels in test_loader:
      imgs, labels = imgs.to(device), labels.to(device)

      preds = model(imgs).argmax(dim=1)           # forward pass + picks the predicted class
      correct += (preds == labels).sum().item()   # count correct predictions

  acc = correct / len(test_loader.dataset)        # compute accuracy
  return acc

Run multiple epochs. Each epoch is a train + validation step.

**Warning**: this cell takes _a lot_ of time to run (~5 min per epoch).

In [None]:
n_epochs = 3
for epoch in range(1, n_epochs+1):
  train_loss = train_one_epoch(model, train_loader,
                               criterion, optimizer, device)
  test_acc   = evaluate(model, test_loader,
                        device)

  print(f"Epoch {epoch:>2}  --  train loss: {train_loss:.4f}  --  test acc: {test_acc:.4f}")

Epoch  1  --  train loss: 1.0925  --  test acc: 0.8831
Epoch  2  --  train loss: 0.5262  --  test acc: 0.9026
Epoch  3  --  train loss: 0.4035  --  test acc: 0.9147


## Dimension reduction on test set

Now we apply dimension reduction to the test set and see if the fine-tuned classifier can still recognize the images.

Load the raw dataset and apply PCA. The fitting is only on the training set as usual, to avoid overestimations.

In [None]:
# get MNIST without transforms
train_ds_raw = datasets.MNIST(root='./data', download=False, train=True,  transform=None)
test_ds_raw  = datasets.MNIST(root='./data', download=False, train=False, transform=None)

# get raw images and labels
X_train = np.stack([np.array(img).ravel() for img, _ in train_ds_raw])

# apply PCA (keep 95% of variance)
pca = PCA(n_components=0.95).fit(X_train)

Create a Dataset class for the reduced dataset (it's the interface used by the other pytorch methods).

In [None]:
class PCA_MNIST(Dataset):
  def __init__(self, raw_dataset, pca, transform):
    self.data       = raw_dataset.data
    self.targets    = raw_dataset.targets
    self.pca        = pca
    self.transform  = transform
    self.mean_      = pca.mean_    # cache mean for inverse transform

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    # get raw image and label
    #   self.data[idx] : torch.Tensor of shape (28,28)
    arr = np.array(self.data[idx])    # convert to NumPy array
    label = int(self.targets[idx])

    # flatten & reconstruct via PCA
    x = arr.ravel().astype(float)             # shape (784,)
    z = self.pca.transform(x[np.newaxis,:])   # shape (1, k)
    x_rec = self.pca.inverse_transform(z)[0]  # shape (784,)

    # turn vector back into a PIL image
    img28 = x_rec.reshape(28,28).clip(0,255).astype(np.uint8)
    pil   = Image.fromarray(img28, mode='L')

    # apply ResNet transform
    img   = self.transform(pil)               # tensor (3,224,224)
    return img, label

Now we can recycle the same code as above.

In [None]:
pca_test_ds     = PCA_MNIST(test_ds_raw, pca, transform)
pca_test_loader = DataLoader(pca_test_ds, batch_size=64, shuffle=False)

In [None]:
test_acc_pca = evaluate(model, pca_test_loader, device)
print(f"Accuracy on PCA‑reconstructed test set: {test_acc_pca:.4f}")

Accuracy on PCA‑reconstructed test set: 0.9147


## Dimension reduction on training set

We can also try to make the fine-tuning on the reduced training set and see the performance on the original test set.

Apply PCA to the training set.

In [None]:
pca_train_ds = PCA_MNIST(train_ds_raw, pca, transform)
pca_train_loader = DataLoader(pca_train_ds, batch_size=64, shuffle=True)

Instantiate a new model to fit.

In [None]:
model_pca = models.resnet50(pretrained=True)

n_features = model_pca.fc.in_features
model_pca.fc = nn.Linear(n_features, 10)

for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True

model_pca = model_pca.to(device)



**Warning**: this cell takes _a lot_ of time to run (~5 min per epoch).

In [None]:
n_epochs = 3
for epoch in range(1, n_epochs+1):
  train_loss = train_one_epoch(model_pca, pca_train_loader,
                               criterion, optimizer, device)
  test_acc   = evaluate(model_pca, test_loader,
                        device)

  print(f"Epoch {epoch:>2}  --  train loss: {train_loss:.4f}  --  test acc: {test_acc:.4f}")

[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, 

Epoch  1  --  train loss: 2.3370  --  test acc: 0.1062


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, 

Epoch  2  --  train loss: 2.3371  --  test acc: 0.1036


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, mode='L')
  pil   = Image.fromarray(img28, 

Epoch  3  --  train loss: 2.3370  --  test acc: 0.1025


Warnings asides, performance drops a lot.