In [2]:
# Define a split between training and validation data (since we were not provided with a separate validation dataset.)
# This split helps us calculate validation loss while experimenting with different trainign techniques. However, our final
# submission should be trained on all samples from the data we are given!
VAL_SPLIT = 25000

# Batch size is used to partition the training data into distinct batches which are used to train the CNN.
# Batch creation is facilitated by pytorch's DataLoader module :)
BATCH_SIZE = 64

# The number of epochs used to train the CNN.
EPOCHS = 10

# The loss function used during training
import torch.nn.functional as F
LOSS_FN = F.cross_entropy

# The learning rate used in the SGD that trains the CNN.
SGD_LEARNING_RATE = 0.1

# The momentum used in the SGD that trains the CNN.
SGD_MOMENTUM = 0.9

import Data # Project data is loaded here
from torch.utils.data import DataLoader, TensorDataset # PyTorch DataLoader and TensorDatasets ;;;)
import torch
import numpy as np

# Partition labeled data into train and val datasets
X_tmp = Data.labeled_images.reshape(-1, 3136)
y_tmp = Data.integer_labels() # Binary to number/letter, then number/letter to unique integer represntation for each label (conv net is only happy with 1D numerical outputs)
print(y_tmp)
X_val, y_val = X_tmp[VAL_SPLIT:], y_tmp[VAL_SPLIT:]
X_train, y_train = X_tmp[0:VAL_SPLIT], y_tmp[0:VAL_SPLIT]
print(f'X_train: {X_train.shape} \ny_train: {y_train.shape}')
print(f'X_val: {X_val.shape} \ny_val: {y_val.shape}')

# Transform the labeled train and val datasets into pytorch Tensors
X_train, y_train, X_val, y_val = map(
    torch.tensor, (X_train, y_train, X_val, y_val)
)

print(f'X_train tensor: {X_train} \ny_train tensor: {y_train}')

# Initialize pytorch TensorDatasets
train_ds = TensorDataset(X_train, y_train)
val_ds = TensorDataset(X_val, y_val)

# Initialize pytorch DataLoaders
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True) # Shuffled to prevent correlation betwen batch ordering and model overfitting, see: https://pytorch.org/tutorials/beginner/nn_tutorial.html#add-validation
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

[ 49 165  88 ... 112   8 108]
X_train: (25000, 3136) 
y_train: (25000,)
X_val: (5000, 3136) 
y_val: (5000,)
X_train tensor: tensor([[  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        ...,
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
        [  0., 255.,   0.,  ...,   0.,   0.,   0.]], dtype=torch.float64) 
y_train tensor: tensor([ 49, 165,  88,  ..., 155,  52, 241], dtype=torch.int32)


In [3]:
from torch import nn

# Calculate the loss for a single training batch
def calc_batch_loss(model, loss_func, xb, yb, opt=None):
    
    # FOR DEBUGGING: PLEASE REMOVE IF IT DOESN"T WORK
    xb = xb.float()
    yb = yb.long()

    # Make predictions and get loss
    predictions = model(xb)
    predictions = predictions.float()
    loss = loss_func(predictions, yb)

    # Backpropagate if an optimizer is specified
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    # Return loss and length
    return loss.item(), len(xb)

# Fit an arbitrary model
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            calc_batch_loss(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[calc_batch_loss(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
        print(epoch, val_loss)

# Lambda PyTorch Module that can be inserted into a "Sequential"
# object to perform arbitrary lambda operations on NN layers.
class Lambda(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x):
        return self.func(x)

### Define LeNET5 model

In [23]:
from torch import optim

class LeNet5(nn.Module):

    def __init__(self, n_classes, preprocess):
        super(LeNet5, self).__init__()

        self.preprocess = preprocess # Preprocess lambda
        
        self.feature_extractor = nn.Sequential( # Convolve to extract image features
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.classifier = nn.Sequential( # Fully connected layers to learn classification task on extracted features
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )

    def forward(self, x):
        x = self.preprocess(x) # Preprocess batch
        x = self.feature_extractor(x) # Extract features
        x = torch.flatten(x, 1) # Flatten to single layer
        logits = self.classifier(x) # Make prediction using fully-connected layers
        probs = F.softmax(logits, dim=1) # Softmax to get probabilities
        return logits#, probs

def preprocess_batch(batch):
    batch = batch.view(-1, 1, 56, 56) # Resize input to make it square
    downsample = nn.AdaptiveAvgPool2d((32, 32)) # Define a downsampling function
    batch = downsample(batch) # Downsample the batch's image data
    return batch

# Declare model
model = LeNet5(n_classes=260, preprocess=preprocess_batch)
opt = optim.SGD(model.parameters(), lr=0.9)
fit(15, model, F.cross_entropy, opt, train_dl, val_dl)

0 4.99194658203125
1 4.66392582244873
2 4.550736502838134
3 4.492142049407959
4 4.524450703430176
5 4.334594984436035
6 4.580164798736572
7 4.434666747283935
8 4.5520953269958495
9 4.214161065673828
10 4.245454286956787
11 4.376397064971924
12 4.321677252197266
13 4.2004441650390625
14 4.161697741699219


In [24]:
torch.no_grad() # Disable gradient before prediction!

predictions = model(X_val.float())
print(f'Prediction tensor: {predictions.shape}')

correct = 0
for i in range(0, len(y_val)):
    true_label = y_val[i]
    softmax_arr = predictions[i].detach().numpy()[0]
    prediction = np.argmax(softmax_arr)
    correct += int(true_label == prediction)

print(correct / len(y_val))


0.005
