# Imports and setup

In [23]:
# We can now load the dependencies
%matplotlib inline 
import torch 
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt 
import torchvision
import torchsummary
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.optim as optim

We can start by setting a seed for reproducibility

In [4]:
torch.manual_seed(0)

<torch._C.Generator at 0x183633c7d90>

# Pre-processing

In [None]:
## TODO Copy code in here when Enzo is done

# Hyperparameter choices

We create a cell to hold the hyperparameters of the model

In [33]:
learning_rate = 0.001
batch_size = 100
num_epochs = 50
gamma_focal_loss = 2
n_loss = 10

We can now create our dataloaders

In [None]:
# Create the datasets and dataloaders
gen_train_loader = None

gen_val_loader = None

gen_test_loader = None

real_train_loader = None

real_val_loader = None

real_test_loader = None

# Model implementation

We can start by loading a pre-trained VGG16 model without the classification layers towards the end (Only the feature extractor).

In [5]:
vgg16 = torchvision.models.vgg16(weights='VGG16_Weights.IMAGENET1K_V1')

We can now visualize its layers:

In [8]:
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

Because we are looking for a pre-trained feature extractor here, we decide to only use the features part and freeze its weights. We can then add a few subsequent layers to fine tune predictions. We can thus define the following model:

In [27]:
class BaseModel(nn.Module):

    def __init__(self, num_classes=13):
        
        super(BaseModel, self).__init__()
        
        # Define the layers of the model
        self.features = torchvision.models.vgg16(weights='VGG16_Weights.IMAGENET1K_V1').features
        self.classifier = nn.Sequential(
            nn.Linear(4608, 2304),
            nn.ReLU(True),
            nn.Linear(2304, 1152),
            nn.ReLU(True),
            nn.Linear(1152, 576),
            nn.ReLU(True),
            nn.Linear(576, num_classes)
        )

        # Set the features to not require gradients
        for param in self.features.parameters():
            param.requires_grad = False


    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


# Training

We can start by finding the device to use for training:

In [29]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if DEVICE == "cuda":
    torch.cuda.empty_cache()

We can then initialize and visualize our model:

In [31]:
basemodel = BaseModel().to(DEVICE)
basemodel

BaseModel(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dila

We can then go ahead and define the loss function we will be using. Because we will opt for a balanced focal loss instead of a regular cross entropy loss which gives more importance to the classes that are harder to classify. We thus implement the focal loss defined by the following formula:
<center><img src="focal loss.png"></center>

where gamma is a tunable hyperparameter. We also further add an alpha term to handle class imbalance, making our loss function a class-balanced focal loss, as shown in https://github.com/AdeelH/pytorch-multi-class-focal-loss.


In [39]:
focal_loss = torch.hub.load(
	'adeelh/pytorch-multi-class-focal-loss',
	model='FocalLoss',
	alpha=torch.tensor([.75, .25]), # TODO: Change the alpha values to represent class proportions
	gamma=gamma_focal_loss,
	reduction='mean',
	force_reload=False,
    verbose = False
) 

Finally, we need an accuracy metric to tune the hyperparameters of the model. We will opt for a balanced accuracy score, which is just regular classification accuracy but adapted to weigh each class by its frequency. We can use the scikit-learn's https://scikit-learn.org/stable/modules/generated/sklearn.metrics.balanced_accuracy_score.html implementation.

We can now define our optimizers. We opt for ADAM gradient descent since it is an industry standard.

In [None]:
opt = optim.Adam(basemodel.parameters(), lr=learning_rate)

We can now proceed to train our model:

In [None]:
# for epoch in range(num_epochs):

# Extra code to use

We will also define Focal Loss as our loss function:

In [56]:
from balanced_loss import Loss

# outputs and labels

num_samples = len(y_generated)

# percentage_per_class = [73.75, 7.19, 1.41, 1.56, 1.56, 0.78, 1.56, 6.41, 1.25, 1.41, 1.09, 0.47, 1.56]
# samples_per_class = [int(num_samples * percentage / 100) for percentage in percentage_per_class]

samples_per_class = [236400, 22944, 564, 624, 624, 312, 624, 25632, 500, 564, 436, 188, 624]
samples_per_class = [int(sample_count/len(y_generated)) for sample_count in samples_per_class]

print(samples_per_class)

# class-balanced focal loss
fLoss = Loss(
    loss_type = "focal_loss",
    beta = 2,
    fl_gamma = 3,
    samples_per_class = samples_per_class,
    class_balanced = False
)

fLoss.to(device) # We move the loss function to device

[3693, 358, 8, 9, 9, 4, 9, 400, 7, 8, 6, 2, 9]


Loss()

## We got it running for EX0000. Let's try running the first 10 np arrays [EX0000-EX0009] and adding epochs as well (NOTE, for now this only works on CPU, not CUDA)

In [None]:
num_training_examples = 10
X_generated = np.empty([64*num_training_examples, 3, 130, 130])

for i in range(num_training_examples): 
    X_generated[i*64:(i+1)*64] = np.load("C:/Users/usuario/RecogniChess/Data Generation/Pre Processed Data Generated/Images/EX_%04d.npy.npz" % i, allow_pickle=True)['arr_0']

In [None]:
y_generated = np.empty([64*num_training_examples]) # Create empty array to store the labels

# Loop through the labels and store it in the array. We don't use np.concatenate or append to save computation time
for i in range(num_training_examples):
    y_generated[i*64:(i+1)*64] = np.load("C:/Users/usuario/RecogniChess/Data Generation/Pre Processed Data Generated/Labels/EX_%04d.npy.npz"  % i, allow_pickle=True)['arr_0'] # Load the labels

In [None]:
print(X_generated.shape) 
print(y_generated.shape)

In [None]:
from balanced_loss import Loss

# outputs and labels

num_samples = len(y_generated)

# percentage_per_class = [73.75, 7.19, 1.41, 1.56, 1.56, 0.78, 1.56, 6.41, 1.25, 1.41, 1.09, 0.47, 1.56]
# samples_per_class = [int(num_samples * percentage / 100) for percentage in percentage_per_class]

samples_per_class = [3693, 358, 8, 9, 9, 4, 9, 400, 7, 8, 6, 2, 9]
print((samples_per_class))

# class-balanced focal loss
fLoss = Loss(
    loss_type = "focal_loss",
    beta = 2,
    fl_gamma = 3,
    samples_per_class = samples_per_class,
    class_balanced = True
)

In [None]:
vgg16_softmax = vgg16_softmax.to("cpu")
num_epochs = 30

learning_rate = 0.01
optimizer = optim.Adam(vgg16_softmax.parameters(), lr=learning_rate)

# Put the model in training mode
vgg16_softmax.train()

# Loop over all instances of EX_0000
for epoch in range(num_epochs):
    epoch_loss = 0.0
    # Loop over all instances of EX_0000
    for i, (image, y_true) in enumerate(train_loader):
        # Move the data to the GPU (if available)
        image = image.to("cpu")
        y_true = y_true.to("cpu").long()

        # Forward pass
        output = vgg16_softmax(image) # Pass the input through the model
        loss = fLoss(output, y_true) # Compute the loss

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss = loss.item()

        _, y_pred = torch.max(output, 1) # Get the predicted class label   

    print(f"Epoch: {epoch}, Focal loss: {epoch_loss:.4f}")


In [None]:
from torchmetrics.functional import accuracy, f1_score

# Set the model to training mode
vgg16_softmax.train()

train_epoch = 0
val_epoch = 0

n = 50 # Compute validation loss every n iterations

train_acc = 0
#train_balanced_acc = 0
train_F1 = 0
loss = 0

# Loop over all instances in train_data
for i in range(len(train_data)):
    # Load the input image
    x = train_data[i]["Image Vector"]

    # Convert the input image to a numpy array and apply the transform
    example1 = np.array(x).astype(np.float32) / 255.0
    example1 = (example1 * 255).astype(np.uint8)
    example1 = transform(example1)
    example1 = example1.unsqueeze(0)
    example1 = example1[:, :3, :, :]

    # Move the data to the GPU (if available)
    example1 = example1.to(device)

    # Forward pass
    output = vgg16_softmax(example1) # Pass the input through the model

    # Convert the true label to a tensor and move it to the GPU (if available)
    y_true_t = torch.tensor([train_data[i]["Piece Label"]]).to(device)

    loss = fLoss_train(output, y_true_t) # Compute the loss

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    _, y_pred_t = torch.max(output, 1) # Get the predicted class label
    
    # Compute the balanced accuracy
    accuracy_t = accuracy(preds = y_pred_t, target = y_true_t, task="multiclass", num_classes=13, average= 'weighted')
    train_acc = accuracy_t / len(train_data) 
    F1_t = f1_score(preds = y_pred_t, target = y_true_t, task="multiclass", num_classes=13, average= 'weighted')
    train_F1 = F1_t
    
    if train_epoch % 25 == 24:
        print(f"Train Iteration: {i+1}, Predicted class: {y_pred_t.item()}, True class: {y_true_t.item()}, Train Loss:  {loss:.4f}, Balanced Train Accuracy: {train_acc:.2%}, , Weighted F1: {train_F1:.2}")
    
    # Compute validation loss and accuracy every n iterations
    if i % n == n-1:
        # Set the model to evaluation mode
        vgg16_softmax.eval()
        
        val_loss = 0
        val_acc = 0
        # val_balanced_acc = 0
        val_F1 = 0

        with torch.no_grad():
            for j in range(len(val_data)):
                # Load the input image
                x = val_data[j]["Image Vector"]

                # Convert the input image to a numpy array and apply the transform
                example2 = np.array(x).astype(np.float32) / 255.0
                example2 = (example2 * 255).astype(np.uint8)
                example2 = transform(example2)
                example2 = example2.unsqueeze(0)
                example2 = example2[:, :3, :, :]

                # Move the data to the GPU (if available)
                example2 = example2.to(device)

                # Forward pass
                output = vgg16_softmax(example2) # Pass the input through the model

                # Convert the true label to a tensor and move it to the GPU (if available)
                y_true_v = torch.tensor([val_data[j]["Piece Label"]]).to(device)

                # Compute the loss and accuracy
                val_loss = fLoss_val(output, y_true_v) # Compute the loss
                _, y_pred_v = torch.max(output, 1) # Get the predicted class label   
                
                accuracy_v = accuracy(preds = y_pred_v, target = y_true_v, task="multiclass", num_classes=13, average= 'weighted')               
                val_acc = accuracy_v / len(val_data) # Compute the accuracy
                F1_v = f1_score(preds = y_pred_v, target = y_true_v, task="multiclass", num_classes=13, average= 'weighted')
                val_F1 = F1_v         
                
                # Compute the balanced accuracy
                y_pred_np = y_pred_v.cpu().numpy()
                y_true_np = y_true_v.cpu().numpy()
                # val_balanced_acc += balanced_accuracy_score(y_true_np, y_pred_np) / len(val_data)

        # Print validation loss, accuracy, and balanced accuracy
        val_epoch += 1
        print(f"Val Epoch: {val_epoch}, Scaled Val Loss:  {val_loss/len(val_data):.4f}, Balanced Val Acc: {val_acc:.2%}, Weighted F1: {val_F1:.2}")

        # Set the model back to training mode
        vgg16_softmax.train()
    
    train_epoch += 1