# Week I

More Neural Networks for images

In [None]:
!wget -q https://github.com/DM-GY-9103-2024F-H/9103-utils/raw/main/src/data_utils.py
!wget -q https://github.com/DM-GY-9103-2024F-H/9103-utils/raw/main/src/image_utils.py

In [None]:
import torch
import matplotlib.pyplot as plt

from torch import nn
from torchvision.transforms import v2

from data_utils import classification_error, display_confusion_matrix
from image_utils import make_image

from WKI_utils import LFWUtils

## Review

Let's quickly review the model from last week.

### Load Data

The version of `LFWUtils.train_test_split()` in this week's utils class has an optional parameter `return_loader` that will return the data already in sensible `DataLoader` objects.

In [None]:
train, test = LFWUtils.train_test_split(0.3, return_loader=True)

### Look at Data

Our `DataLoaders` are iterable objects, which means we need to do a bit of unpacking to get to actual labels and pixels.

In [None]:
img, label = next(iter(train))
print(LFWUtils.LABELS[label[0]])
display(make_image(img[0], width=130))

### Model, Optimizer, Cost/Loss Function

This is the model from last week.

In [None]:
image_length = LFWUtils.IMAGE_SIZE[0] * LFWUtils.IMAGE_SIZE[1]

model = nn.Sequential(
  nn.Dropout(0.2),
  nn.Linear(image_length, image_length // 8),
  nn.ReLU(),

  nn.Dropout(0.2),
  nn.Linear(image_length // 8, len(LFWUtils.LABELS)),
)

learning_rate = 1e-5
optim = torch.optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.CrossEntropyLoss()

### Train

In [None]:
for e in range(32):
  model.train()
  for x, y in train:
    optim.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optim.step()

  if e % 4 == 0:
    print(f"Epoch: {e} loss: {loss.item():.4f}")

print(f"Epoch: {e} loss: {loss.item():.4f}")

### Eval

Could've been in the loop, but we already know this model...

In [None]:
train_labels, train_predictions = LFWUtils.get_labels(model, train)
test_labels, test_predictions = LFWUtils.get_labels(model, test)
train_error = classification_error(train_labels, train_predictions)
test_error = classification_error(test_labels, test_predictions)
print(f"train error: {train_error:.4f}, test error: {test_error:.4f}")

display_confusion_matrix(train_labels, train_predictions, display_labels=LFWUtils.LABELS)
display_confusion_matrix(test_labels, test_predictions, display_labels=LFWUtils.LABELS)

## Make Training Harder Again

### Image augmentation

# ADD IMAGE:

https://pytorch.org/vision/0.13/auto_examples/plot_transforms.html

In [None]:
img, label = next(iter(train))
print(LFWUtils.LABELS[label[0]])
display(make_image(img[0], width=130))

In [None]:
# original (130 x 170)
transforms = v2.Compose([
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=15),
    v2.RandomPerspective(distortion_scale=0.25, p=0.5),
    # v2.RandomResizedCrop(size=(170, 130), scale=(.75, .9), antialias=True),
    # v2.RandomAffine(degrees=15, translate=(0.1, 0.3), scale=(1.1, 1.5))
])

In [None]:
timg = transforms(make_image(img[0], width=130))
display(timg)

In [None]:
train, test = LFWUtils.train_test_split(0.3, return_loader=True, train_transform=transforms)

In [None]:
img, label = next(iter(train))
print(LFWUtils.LABELS[label[0]])
display(make_image(img[0], width=130))

In [None]:
image_length = LFWUtils.IMAGE_SIZE[0] * LFWUtils.IMAGE_SIZE[1]

model = nn.Sequential(
  nn.Dropout(0.2),
  nn.Linear(image_length, image_length // 8),
  nn.ReLU(),

  nn.Dropout(0.2),
  nn.Linear(image_length // 8, len(LFWUtils.LABELS)),
)

learning_rate = 1e-5
optim = torch.optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.CrossEntropyLoss()

In [None]:
for e in range(32):
  model.train()
  for x, y in train:
    optim.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optim.step()

  if e % 4 == 0:
    print(f"Epoch: {e} loss: {loss.item():.4f}")

print(f"Epoch: {e} loss: {loss.item():.4f}")

In [None]:
train_labels, train_predictions = LFWUtils.get_labels(model, train)
test_labels, test_predictions = LFWUtils.get_labels(model, test)
train_error = classification_error(train_labels, train_predictions)
test_error = classification_error(test_labels, test_predictions)
print(f"train error: {train_error:.4f}, test error: {test_error:.4f}")

display_confusion_matrix(train_labels, train_predictions, display_labels=LFWUtils.LABELS)
display_confusion_matrix(test_labels, test_predictions, display_labels=LFWUtils.LABELS)

# Cost vs Eval

Cost can go down without changing accuracy.

`true_labels = [0, 1]`

`prediction_probs := [0.1, 0.9], [0.9, 0.1]` $\Rightarrow$
`prediction_labels = [1, 0]`

accuracy: $0$, loss: $1.7$

`prediction_probs := [0.45, 0.55], [0.51, 0.49]` $\Rightarrow$
`prediction_labels = [1, 0]`

accuracy: $0$, loss: $0.7$


In [None]:
nl = nn.CrossEntropyLoss()

l = torch.Tensor([0, 1]).long()
p0 = torch.Tensor([[0.1, 0.9], [0.9, 0.1]])
p1 = torch.Tensor([[0.45, 0.55], [0.51, 0.49]])

nl(p0, l), nl(p1, l)

## Convolutions

# IMAGE



In [None]:
import torch
import matplotlib.pyplot as plt

from torch import nn
from torchvision.transforms import v2

from data_utils import classification_error, display_confusion_matrix
from image_utils import make_image

from WKI_utils import LFWUtils

In [None]:
# original (130 x 170)
transforms = v2.Compose([
	v2.RandomHorizontalFlip(p=0.5),
	v2.RandomRotation(degrees=15),
	v2.RandomPerspective(distortion_scale=0.25, p=0.5),
	# v2.RandomResizedCrop(size=(170, 130), scale=(.75, .9), antialias=True),
	# v2.RandomAffine(degrees=15, translate=(0.1, 0.3), scale=(1.1, 1.5))
])

In [None]:
train, test = LFWUtils.train_test_split(0.3, cnn_loader=True, train_transform=transforms)

In [None]:
img, label = next(iter(train))
display(v2.ToPILImage()(img[0]))

https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [None]:
width, height = LFWUtils.IMAGE_SIZE
linear_length = (((width//2)-1)) * (((height//2)-1)) * 32

model = nn.Sequential(
  nn.Dropout(0.2),
  nn.Conv2d(1, 32, 3),
  nn.ReLU(),
  nn.MaxPool2d(2, 2),

  # More Convs ?

  nn.Flatten(1, -1),

  nn.Linear(linear_length, 512),
  nn.ReLU(),

  nn.Linear(512, len(LFWUtils.LABELS)),
)

learning_rate = 2e-2
optim = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

loss_fn = nn.CrossEntropyLoss()

In [None]:
for e in range(32):
  model.train()
  for x, y in train:
    optim.zero_grad()
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optim.step()

  if e % 4 == 0:
    print(f"Epoch: {e} loss: {loss.item():.4f}")

print(f"Epoch: {e} loss: {loss.item():.4f}")

In [None]:
train_labels, train_predictions = LFWUtils.get_labels(model, train)
test_labels, test_predictions = LFWUtils.get_labels(model, test)
train_error = classification_error(train_labels, train_predictions)
test_error = classification_error(test_labels, test_predictions)
print(f"train error: {train_error:.4f}, test error: {test_error:.4f}")

display_confusion_matrix(train_labels, train_predictions, display_labels=LFWUtils.LABELS)
display_confusion_matrix(test_labels, test_predictions, display_labels=LFWUtils.LABELS)

## CNNs

## Object Detection

- Regression + Classification
