In [1]:
import torch
import torch.nn as nn
import torch.functional as F

##### Basic Convolutional NN

The first NN used in our experiment is a basic NN featuring four convolutional layers, one linear layer and pooling and dropout layers.

TODO: Include diagram here

In [2]:
class convNN2(torch.nn.Module):
    def __init__(self):
        super(convNN2, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3)

        self.fc1 = nn.Linear(128, 6)

        self.pool = nn.MaxPool2d(2, 2)

        self.dropout = nn.Dropout2d(p=0.2)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = self.pool(x)

        bs, _, _, _ = x.shape
        x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
        x = self.dropout(x)
        out = self.fc1(x) 

        return out

### Dense Net

This network is based on the pytorch implementation of [densenet121](https://pytorch.org/vision/main/models/generated/torchvision.models.densenet121.html). A dense net was chosen as it is a common network used in image recognition and classification tasks.

Since the default pytorch implementation produces an output tensor of shape [1000,] a few linear layers were added to turn this into a [6,] output in order to describe just three landmarks.




In [3]:
class denseNN(nn.Module):
    def __init__(self, device):
        super(denseNN, self).__init__()
        # Pytorch does not come with densenet121 installed and it must be downloaded.
        # 
        self.dense121 = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=False)
        self.fc1 = nn.Linear(1000, 600)
        self.fc2 = nn.Linear(600, 100)
        self.fc3 = nn.Linear(100, 6)

    def forward(self, x):
        x = F.relu(self.dense121(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

## Training Process and Validation
- include hyperparameters and issues with learning rate.



First some imports and class definitions

In [4]:
import torch.optim as optim
import numpy as np

import time
import copy
import json

from torch.utils.data import DataLoader
from argparse import ArgumentParser

In [5]:
class Timer():
    def __init__(self):
        self.start_time = time.time()

    def start(self):
        self.start_time = time.time()

    def elapsed_time(self):
        current_time = time.time()

        duration = current_time - self.start_time
    
        hours = int(duration / 3600)
        minutes = int((duration % 3600) / 60)
        seconds = int((duration % 3600) % 60)

        return f"{hours}h {minutes}m {seconds}s"

### Validation

The evaluate function is used for both validation and testing purposes. Here we calculate the straight line distance from each coordinate output by the model and the real label coordinates. 

Average distance was chosen over MSE or other similar error functions as it is easier to interpret.

The function also produces the standard deviation of these scores as it may provide some useful information in terms of variance. A high variance would simply mean the model is simply guessing in a tight area and not producing unique coordinates for each image.

In [6]:
def evaluate(model, valid_set_path, device):
    UTKFace = CustomImageDataset(valid_set_path, 'UTKFace')
    valid_set = DataLoader(UTKFace, 
                            500, 
                            shuffle=True)

    # We're calculating the distance ourselves as using MSE loss doesnt 
    # allow us to square root terms individually.
    model.eval()
    with torch.no_grad():
        for images, _, _, _, landmarks in valid_set:
            images, landmarks = images.to(device), landmarks.to(device)

            outputs = model(images).view([-1,3,2]) # organise into (x, y) pairs

            land_idx = [8, 30, 39]  # The labels we are training for
            difference = torch.square(outputs - landmarks[:, land_idx]).to(device)
            difference = torch.sqrt(difference[:, 0] + difference[:, 1])

    model.train()
    return torch.mean(difference).item(), torch.std(difference).item()

#### Now for the training

Here validation is only performed every 20 iterations as it adds a significant amount of time to the training process. Validating less frequently allows us to graph the results. Each time validation is performed we check the current set of weights against the previous best scoring model. If the current weights perform better we update the best model accordingly and record which iteration it occured in. 

SGD does not ensure the final epoch and iteration does not necessarily produce the best model. As such, validating regularly allows us to choose the best model. While we cannot do this every iteration due to the overhead, we assume validating regularly will yield a set of weights close to the optimal weights.

In [7]:
def train(model, train_loader, lr, device, valid_set, epochs=5):
    loss_func = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)

    # Initialise somewhere to save data for later graphing
    batches = len(train_loader)
    scores = np.empty([batches * epochs, 3])
    scores[:] = np.nan

    best_model = model
    best_scores = {"iteration": 0, 
                "mean": 1000,
                "std": 1000,
                "loss_list": []}

    timer = Timer()
    timer.start()

    for epoch in range(epochs):
        for i, data in enumerate(train_loader, 0):
            images, _, _, _, landmarks = data   # images, age, gender, race, landmarks

            # Zero paramter gradients
            optimizer.zero_grad()
            images, landmarks = images.to(device), landmarks.to(device)

            outputs = model(images)
            land_idx = [8, 30, 39]  # The indexs of the landmarks we are training with
            loss = loss_func(outputs, landmarks[:, land_idx].view(-1, 6))
            best_scores["loss_list"].append(loss.item())    # Record for graphing later
            loss.backward()
            optimizer.step()

            # Validation is performed every 20 iterations due to its high overhead.
            if i % 20 == 0:
                mean, std = evaluate(model, valid_set, device)
                scores[(epoch * batches) + i, 0] = (epoch * batches) + i
                scores[(epoch * batches) + i, 1] = mean
                scores[(epoch * batches) + i, 2] = std
                print(f"[{timer.elapsed_time()}] Epoch: {epoch}, iteration: {i}, loss: {loss.item()}, mean: {mean}, std: {std}")

                # If the current model is the best we have seen so far, preserver the weights
                if mean < best_scores["mean"]:
                    best_model = copy.deepcopy(model)    # We need to copy to preserve weights
                    best_scores["iteration"] = (epoch * batches) + i
                    best_scores["mean"] = mean
                    best_scores["std"] = std
            
    # Remove iterations where we did not do any validation
    filtered_scores = scores[~np.isnan(scores).any(axis=1)]

    return best_model, best_scores, filtered_scores

Since we used cli arguments to make the code in this notebook we can instead add the arguments to a string and pass the string to the `ArgumentParser` class.

* -f, --train_file
    - Used to specify the path to a training file. These should be text files with a list of images. For example `ll_training_75-25_split.txt` indicates the model should use the training set with the 75-25 gender split.
* -vf, --validation_file
    - Like the train file, this argument is used to indicate which subset of `UTKFace` to use for validation.
* -b, --batch
    - Set the batch size for training. All models were trained on a batch size of 32.
* -m, --model
    - Choose which model to train.
    - The models availible are:
        - "convNN2"
        - "dense"
        - "resnet18"
        - "resnet34"
        - "resnet50"
* -lr, --learning_rate
    - Specify the learning rate.
* --cuda
    - Including this argument will enable training on cuda.
* -e, --epochs
    - Specify the number of epochs to train for.

In [9]:
arg_str = "train.py -b 32 -lr 0.0001 --cuda -f ll_training_75-25_split.txt -vf ll_validation_75-25_split.txt -e 50 -m dense"

# Read in args
parser = ArgumentParser(arg_str)
parser.add_argument("-f", "--train_file",
                    help="Path to data file.", 
                    metavar="FILE_PATH", 
                    default="landmark_list.txt")
parser.add_argument("-vf", "--validation_file",
                    help="Choose file to use for validation.",
                    metavar="FILE_PATH",
                    default="landmark_list.txt")
parser.add_argument("-b", "--batch", 
                    help="Batch size for training.", 
                    type=int, 
                    metavar="INT",
                    default=64)
parser.add_argument("-m", "--model",
                    help="Choose which model structure to use.",
                    default="convNN2",
                    metavar="MODEL_NAME")
parser.add_argument("-lr", "--learning_rate",
                    help="Learning rate to run the optimizer function with.",
                    default=0.0001,
                    type=float,
                    metavar="FLOAT")
parser.add_argument("--cuda",
                    help="Add this argument to run the code using GPU acceleration.",
                    action="store_true")
parser.add_argument("-e", "--epochs",
                    help="Dictate number of epochs to train for.",
                    type=int,
                    metavar="INT",
                    default=5)

args, _  = parser.parse_known_args()

In [10]:
device = "cpu"
if args.cuda and torch.cuda.is_available():    
    device = "cuda"

model = None
if args.model == "convNN2":
    model = convNN2().to(device)
elif args.model == "resnet18":
    model = resnet18().to(device)
elif args.model == "resnet34":
    model = resnet34().to(device)
elif args.model == "resnet50":
    model = resnet50().to(device)
elif args.model == "dense":
    model = denseNN(device).to(device)

UTKFace = CustomImageDataset(args.train_file, 'UTKFace')
train_dataloader = DataLoader(UTKFace, 
                                batch_size=args.batch, 
                                shuffle=True)

NameError: name 'CustomImageDataset' is not defined

In [None]:
print(f"Training {args.model} from {args.train_file} with batch_size={args.batch}\n")

# Train model
model, info, plots = train(model, train_dataloader, args.learning_rate, device, args.validation_file, epochs=args.epochs)

In [None]:
# save model and training/validation results
# filename includes batchsize, epoch number, learning rate
filename = f"{args.model}_{args.train_file}_batch{args.batch}_ep{args.epochs}_lr{args.learning_rate}"
model_path = f"./models/{filename}.pt"
scores_path = f"./model_scores/{filename}.csv"
torch.save(model.state_dict(), model_path)
np.savetxt(scores_path, plots, delimiter=",")

info["epochs"] = args.epochs
info["batch"] = args.batch

with open(f"./model_infos/{filename}.json", "w") as outfile:
    json.dump(info, outfile)

results

marcos stuff