<a href="https://colab.research.google.com/github/Apoak/Deep-Learning-Projects/blob/main/Facial_keypoints.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
import os
import sklearn
from torch.utils.data import TensorDataset, DataLoader

In [None]:
if not os.path.exists('facial_keypoints.npz'):
  !wget -O facial_keypoints.npz "https://www.dropbox.com/scl/fi/27qggijmythfjg04s24xq/facial_keypoints.npz?rlkey=h91gwodhrfuz8hrc7ux9qnq7s&dl=1"

**Checking out the data**

In [None]:
import matplotlib.pyplot as plt

In [None]:
data = np.load('facial_keypoints.npz')
base_images = data['images']
base_keypoints = data['keypoints']
print(base_images.shape)
print(base_keypoints.shape)
print(base_images[12])
# print(images[0][0])

In [None]:
fig, axes = plt.subplots(5, 5, figsize=(10, 10))

for i, ax in enumerate(axes.flat):
    ax.imshow(base_images[i].squeeze(), cmap='gray')  # Display the image in grayscale
    ax.scatter(base_keypoints[i][0::2], base_keypoints[i][1::2], s=5, marker='.', c='m')  # Display keypoints
    ax.axis('off')  # Hide axes

plt.tight_layout()
plt.show()

**Preprocessing the data:**

In [None]:
# Before min max
print(f"Keypoints: {base_keypoints.shape}, {base_keypoints.dtype}, {np.nanmin(data['keypoints'])},  {np.nanmax(data['keypoints'])}")
print(f"Images: {base_images.shape}, {base_images.dtype}, {np.min(data['images'])},  {np.max(data['images'])}")

Keypoints: (7049, 30), float32, 0.6865919828414917,  95.9356460571289
Images: (7049, 1, 96, 96), int64, 0,  255


In [None]:
# Citation
min_max_scaler = sklearn.preprocessing.MinMaxScaler((0,96))
images_reshaped = base_images.reshape(7049, -1)
images_scaled = min_max_scaler.fit_transform(images_reshaped)
images_scaled = images_scaled.reshape(7049, 1, 96, 96)

# keypoints = min_max_scaler.fit_transform(keypoints.reshape(-1, 30)).reshape(-1, 15, 2)

#keypoints_scaled = min_max_scaler.fit_transform(base_keypoints.reshape(-1, 30)).reshape(-1, 30)

# print(f"Scaled Keypoints: {keypoints.shape}, {keypoints.dtype}, {np.nanmin(keypoints)},  {np.nanmax(keypoints_s)}")
print(f"Scaled Images: {images_scaled.shape}, {images_scaled.dtype}, {np.min(images_scaled)},  {np.max(images_scaled)}")


**Train and test split:**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = torch.tensor(images_scaled).float().cuda()
y = torch.tensor(base_keypoints).float().cuda()
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)print(X_train.dtype)
# print(y_train.shape)
# print(X_test.shape)
# print(y_test.shape)

**Preparing the data loader:**

In [None]:
train_dataset = TensorDataset(X_train, y_train)
batch_size = 32 # Think about this one!
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
test_dataset = TensorDataset(X_test, y_test)
batch_size = 32 # Think about this one!
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

**Create a CNN:**

In [None]:
import torch.nn.functional as F

**Citation:** Looked at this for some guidance https://www.digitalocean.com/community/tutorials/vgg-from-scratch-pytorch

In [None]:
class Cnn(nn.Module):
    def __init__(self, num_classes = 30):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 4, 3, padding = 1),
            nn.ReLU())
        # Pools
        self.layer2 = nn.Sequential(
            nn.Conv2d(4, 4, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)) # 96/2 = 48

        self.layer3 = nn.Sequential(
            nn.Conv2d(4, 8, 3, padding = 1),
            nn.ReLU())

        # Pools
        self.layer4 = nn.Sequential(
            nn.Conv2d(8, 8, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)) # 48/2 = 24

        self.layer5 = nn.Sequential(
            nn.Conv2d(8, 16, 3, padding = 1),
            nn.ReLU())

        # Pools and Flattens
        self.layer6 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), #24/2 = 12
            torch.nn.Flatten())

        # Dense layer
        self.fc = nn.Sequential(
            nn.Linear(12*12*16, 4096),
            nn.ReLU())

        # Dense layer
        self.fc1 = nn.Sequential(
            nn.Linear(4096, 4096),
            nn.ReLU())

        # Output
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        # print(f"layer1 shape: {out.shape}")
        out = self.layer2(out)
        # print(f"layer2 shape: {out.shape}")
        out = self.layer3(out)
        # print(f"layer3 shape: {out.shape}")
        out = self.layer4(out)
        # print(f"layer4 shape: {out.shape}")
        out = self.layer5(out)
        # print(f"layer5 shape: {out.shape}")
        out = self.layer6(out)
        # print(f"layer6/flatten shape : {out.shape}")
        out = self.fc(out)
        # print(f"Dense1 shape: {out.shape}")
        out = self.fc1(out)
        # print(f"Dense2 shape: {out.shape}")
        out = self.fc2(out)
        # print(f"Output shape: {out.shape}")
        return out

cnn = Cnn().cuda()
# # Debug
# x, y = next(iter(train_loader))
# #print(x.shape)
# print(cnn(x.cuda()).shape)
# print(cnn(x.cuda()))



In [None]:
def masked_mae_loss(y_pred,y_true):
    y_true = y_true.cuda()

    mask = 1-torch.isnan(y_true).float()

    diff = torch.abs(y_true-y_pred)

    return torch.nansum(diff*mask)/torch.nansum(mask)

**Citation:** Used chatGPT to help me deal with the nan values in the y_true.

In [None]:
# These are custom mean absolute error and mean squared error functions which deal with the nan values in the y_true.
def masked_mae(y_pred, y_true):
    mask = ~torch.isnan(y_true)
    if mask.sum() == 0:
        return torch.tensor(0.0, device=y_pred.device)  # No valid data
    return torch.mean(torch.abs(y_pred[mask] - y_true[mask]))

def masked_mse(y_pred, y_true):
    mask = ~torch.isnan(y_true)
    if mask.sum() == 0:
        return torch.tensor(0.0, device=y_pred.device)
    return torch.mean((y_pred[mask] - y_true[mask])**2)


In [None]:
from sklearn.metrics import r2_score
import numpy as np

def masked_r2(y_pred, y_true):
    y_pred_np = y_pred.cpu().detach().numpy()
    y_true_np = y_true.cpu().detach().numpy()

    valid_mask = ~np.isnan(y_true_np)  # Ignore NaNs
    if np.sum(valid_mask) == 0:
        return np.nan  # No valid values

    return r2_score(y_true_np[valid_mask], y_pred_np[valid_mask])


In [None]:
#test cnn
def test_cnn(cnn, test_loader):
    # cnn.eval()
  mae_total, mse_total = 0, 0
  count = 0

  for x_batch, y_true in test_loader:
    x_batch = x_batch.cuda()
    y_true = y_true.cuda()
    y_pred = cnn(x_batch.cuda())

    mae_total += masked_mae(y_pred, y_true)
    mse_total += masked_mse(y_pred, y_true)
    count += 1

  print("Final MAE:", mae_total / count)
  print("Final MSE:", mse_total / count)
  print("Final RÂ² Score:", masked_r2(y_pred, y_true.cuda()))

**Test the model before training**

In [None]:
test_cnn(cnn, test_loader)

In [None]:
# Train cnn
def train_cnn(cnn, train_loader, epochs, optimizer):

  for epoch in range(epochs):
    for x_batch, y_batch in train_loader:
      x_batch = x_batch.cuda()
      y_batch = y_batch.cuda()
      # cnn.forward(x_batch)
      optimizer.zero_grad()
      y_pred = cnn.forward(x_batch.float())
      # print(f"y_pred shape: {y_pred.shape}, y_batch shape: {y_batch.shape}")
      loss = masked_mae_loss(y_pred,y_batch)
      loss.backward()
      optimizer.step()
    print(f"Epoch: {epoch}, Loss: {loss}")

In [None]:
train_cnn(cnn, train_loader, epochs=70, optimizer = torch.optim.SGD(cnn.parameters(),lr=0.0003))

**Test the model after training**

In [None]:
test_cnn(cnn, test_loader)

**Test images with Ground truth keypoints**
Truth values: Pink
Predicted: Blue


In [None]:
fig, axes = plt.subplots(3, 3, figsize=(10, 10))
dataiter = iter(test_loader)
test_images, test_labels = next(dataiter)
pred_labels = cnn(test_images.cuda())

pred_labels = pred_labels.cpu()
pred_labels = pred_labels.detach().numpy()
#print(test_labels)
for i, ax in enumerate(axes.flat):
    ax.imshow(test_images[i].cpu().squeeze(), cmap='gray')  # Display the image in grayscale
    ax.scatter(test_labels[i][0::2].cpu(), (test_labels[i][1::2].cpu()), s=20, marker='.', c='m')  # Display keypoints
    ax.scatter(pred_labels[i][0::2], (pred_labels[i][1::2]), s=20, marker='.', c='b')  # Display keypoints
    ax.axis('off')  # Hide axes

plt.tight_layout()
plt.show()

**Predicted keypoints over Training Images:**
Truth Values: Pink
Predicted values: Blue

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(10, 10))
dataiter = iter(train_loader)
train_images, truth_labels = next(dataiter)

train_labels = cnn(train_images[:32].cuda())
train_labels = train_labels.cpu()
train_labels = train_labels.detach().numpy()

train_labels = train_labels
for i, ax in enumerate(axes.flat):
    # print(labels[i])
    ax.imshow(train_images[i][0].cpu(), cmap='gray')  # Display the image in grayscale
    ax.scatter(truth_labels[i][0::2].cpu(), (truth_labels[i][1::2].cpu()), s=20, marker='.', c='m')  # Display keypoints
    ax.scatter(train_labels[i][0::2], (train_labels[i][1::2]), s=20, marker='.', c='b')  # Display keypoints
    ax.axis('off')  # Hide axes

plt.tight_layout()
plt.show()

**Analysis:**
While the MAE and MSE are not large values and the R^2 is good, when the predicted keypoints are projected onto a test or training image it is obvious that small difference in ground truth pixel values and prodicted pixel values result in keypoints looking visually incorrect.

The network seems to be able to identify keypoints around the eyes much better than keypoints around the mouth.

In regards to overfitting it appears the network is better at outputting keypoints on training images than on testing images. This is reflected in the various image grids above.

The model seems to have a higher level of variance.

**Improved CNN:**
Added dropout and batch normalization

In [None]:
class Improved_Cnn(nn.Module):
    def __init__(self, num_classes = 30):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 4, 3, padding = 1),
            nn.BatchNorm2d(4),
            nn.ReLU())
        # Pools
        self.layer2 = nn.Sequential(
            nn.Conv2d(4, 4, 3, padding = 1),
            nn.BatchNorm2d(4),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)) # 96/2 = 48

        self.layer3 = nn.Sequential(
            nn.Conv2d(4, 8, 3, padding = 1),
            nn.BatchNorm2d(8),
            nn.ReLU())

        # Pools
        self.layer4 = nn.Sequential(
            nn.Conv2d(8, 8, 3, padding = 1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)) # 48/2 = 24

        self.layer5 = nn.Sequential(
            nn.Conv2d(8, 16, 3, padding = 1),
            nn.BatchNorm2d(16),
            nn.ReLU())

        # Pools and Flattens
        self.layer6 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding = 1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), #24/2 = 12
            torch.nn.Flatten())

        # Dense layer
        self.fc = nn.Sequential(
            nn.Dropout(.2),
            nn.Linear(12*12*16, 4096),
            nn.ReLU())

        # Dense layer
        self.fc1 = nn.Sequential(
            nn.Dropout(.2),
            nn.Linear(4096, 4096),
            nn.ReLU())

        # Output
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)

        out = self.layer3(out)
        out = self.layer4(out)

        out = self.layer5(out)
        out = self.layer6(out)

        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        # print(f"Output shape: {out.shape}")
        return out

improved_cnn = Improved_Cnn().cuda()
# # Debug
# x, y = next(iter(train_loader))
# #print(x.shape)
# print(cnn(x.cuda()).shape)
# print(cnn(x.cuda()))



**Test before training:**

In [None]:
test_cnn(improved_cnn, test_loader)

**Train improved cnn:**

In [None]:
train_cnn(improved_cnn, train_loader, epochs=130, optimizer = torch.optim.Adam(cnn.parameters(),lr=0.0003, weight_decay= .001))

**Test improved cnn after training:**

In [None]:
test_cnn(improved_cnn, test_loader)

**Test images with predicted keypoints:**
Pink: Truth
Blue: Predicted

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(10, 10))
dataiter = iter(test_loader)
test_images2, test_labels2 = next(dataiter)
pred_labels2 = cnn(test_images.cuda())

pred_labels2 = pred_labels2.cpu()
pred_labels2 = pred_labels2.detach().numpy()
#print(test_labels)
for i, ax in enumerate(axes.flat):
    ax.imshow(test_images2[i].cpu().squeeze(), cmap='gray')  # Display the image in grayscale
    ax.scatter(test_labels2[i][0::2].cpu(), (test_labels2[i][1::2].cpu()), s=20, marker='.', c='m')  # Display keypoints
    ax.scatter(pred_labels2[i][0::2], (pred_labels2[i][1::2]), s=20, marker='.', c='b')  # Display keypoints
    ax.axis('off')  # Hide axes

plt.tight_layout()
plt.show()

**predicted keypoints over training images:**
Truth: Pink
Predicted: blue

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(10, 10))
dataiter = iter(train_loader)
train_images2, truth_labels2 = next(dataiter)

train_labels2 = cnn(train_images2[:32].cuda())
train_labels2 = train_labels2.cpu()
train_labels2 = train_labels2.detach().numpy()

train_labels = train_labels
for i, ax in enumerate(axes.flat):
    # print(labels[i])
    ax.imshow(train_images2[i][0].cpu(), cmap='gray')  # Display the image in grayscale
    ax.scatter(truth_labels2[i][0::2].cpu(), (truth_labels2[i][1::2].cpu()), s=20, marker='.', c='m')  # Display keypoints
    ax.scatter(train_labels2[i][0::2], (train_labels2[i][1::2]), s=20, marker='.', c='b')  # Display keypoints
    ax.axis('off')  # Hide axes

plt.tight_layout()
plt.show()