In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import init

import torch.optim as optim
from torchvision.transforms import v2
from torch.utils.data import DataLoader

import copy

import tqdm
from PIL import Image
import numpy as np

from mobilenetv3 import *
from torchsummary import summary


In [28]:
# ================= Model config values =================
learning_rate = 0.001

save_model_name = "MobileNetTinyv1Trained"

In [29]:
mobilenet = mobilenetv3_tiny()

mobilenet.to(torch.device("cuda"))

summary(mobilenet, (3, 85, 85))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 43, 43]             108
       BatchNorm2d-2            [-1, 4, 43, 43]               8
             ReLU6-3            [-1, 4, 43, 43]               0
         h_sigmoid-4            [-1, 4, 43, 43]               0
           h_swish-5            [-1, 4, 43, 43]               0
            Conv2d-6            [-1, 4, 43, 43]              36
       BatchNorm2d-7            [-1, 4, 43, 43]               8
              ReLU-8            [-1, 4, 43, 43]               0
          Identity-9            [-1, 4, 43, 43]               0
           Conv2d-10            [-1, 8, 43, 43]              32
      BatchNorm2d-11            [-1, 8, 43, 43]              16
 InvertedResidual-12            [-1, 8, 43, 43]               0
           Conv2d-13            [-1, 8, 22, 22]              72
      BatchNorm2d-14            [-1, 8,

In [30]:
print(mobilenet)

MobileNetV3(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 4, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): h_swish(
        (sigmoid): h_sigmoid(
          (relu): ReLU6(inplace=True)
        )
      )
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=4, bias=False)
        (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Identity()
        (4): Conv2d(4, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (5): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(8, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=8, bias=False)
        (1): BatchNo

# Training

In [31]:
import pickle

dataset_output_name = "dataset-full"

datasetpath = "./" + dataset_output_name + ".pkl"

# Load dataset
with open(datasetpath, 'rb') as f:
    dataset1 = pickle.load(f)

print(f"Dataset number of samples: {len(dataset1)}")

Dataset number of samples: 15


In [32]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

        self.transform = v2.Compose([ 
            v2.PILToTensor(),
            v2.ToDtype(torch.float32, scale=True),
        ]) 

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]

        image.resize((85,85), Image.NEAREST) # Resize to 85x85 from 170x170

        label = self.labels[idx]
        if label:
            label = torch.tensor([1.0], dtype=torch.float32) 
        else:
            label = torch.tensor([0.0], dtype=torch.float32) 

        image = self.transform(image)

        return {'image': image, 'label': label}

In [38]:
from sklearn.model_selection import train_test_split

images = [s['image'] for s in dataset1]
labels = [s['label'] for s in dataset1]

# Assuming images and labels are your data
images_train, images_test, labels_train, labels_test = train_test_split(images, labels, test_size=0.2)


In [39]:
len(labels_train)

12

In [40]:
# Calculate class imbalance factor
# Is used in the loss function to mitigate the class imbalance issues

num_true = sum([x for x in labels_train if x])
class_imbalance_factor = (len(labels_train)-num_true)/num_true

print(f"Class imbalance factor: {class_imbalance_factor}")

Class imbalance factor: 5.0


In [41]:
train_dataset = CustomDataset(images_train, labels_train)
test_dataset = CustomDataset(images_test, labels_test)

In [42]:
def get_num_correct(y_true, y_prob):
    return (y_true == y_prob).sum().item()

def get_accuracy(y_true, y_prob):
    return (y_true == y_prob).sum().item() / y_true.size(0)

In [43]:
from sklearn.metrics import confusion_matrix

# Create a DataLoader for the test set
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)

def test_model(model):
    # Switch model to evaluation mode
    model.eval()
    model.to(torch.device("cuda"))
    correct = 0
    total = 0
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        for data in test_dataloader:
            images, labels = data['image'], data['label']
            images = images.to(torch.device("cuda"))
            labels = labels.to(torch.device("cuda"))
            outputs = model(images)
            outputs = outputs > 0.5 # output is now 0 or 1
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())
            total += labels.size(0)
            correct += get_num_correct(labels, outputs.data)
    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy} %%')

    # Calculate and print the confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    print('Confusion Matrix:')
    print(cm)

    return accuracy

In [44]:
def train(model, n_epochs, train_dataloader, optimizer, criterion):

    # Hold the best model
    best_acc = - np.inf   # init to negative infinity
    best_weights = None

    for epoch in range(n_epochs):
        # print(epoch)
        model.train()
        num_batches = 0
        total_loss  = 0
        with tqdm.tqdm(train_dataloader) as bar:
            bar.set_description(f"Epoch {epoch}")
            for data in bar:
                # take a batch
                inputs, labels = data['image'], data['label']
                inputs = inputs.to(torch.device("cuda"))
                labels = labels.to(torch.device("cuda"))
                
                optimizer.zero_grad()

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                acc = get_accuracy(labels, outputs.data)

                num_batches += 1
                total_loss += float(loss)

                bar.set_postfix(
                    loss=float(total_loss/num_batches),
                    acc=float(acc)
                )
        # evaluate accuracy at end of each epoch
        
        acc = test_model(model)
        acc = float(acc)
        if acc > best_acc:
            print("saving best acc model")
            best_acc = acc
            best_weights = copy.deepcopy(model.state_dict())

    return best_weights

In [45]:
n_epochs = 30

model = mobilenetv3_tiny()
model.train()
model.to(torch.device("cuda"))

criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(class_imbalance_factor))
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Create a DataLoader from the dataset
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

best_weights = train(model, n_epochs, train_dataloader, optimizer, criterion)


Epoch 0: 100%|██████████| 2/2 [00:00<00:00, 34.27it/s, acc=0, loss=1.32]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]
saving best acc model


Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 30.75it/s, acc=0, loss=1.25]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 30.16it/s, acc=0, loss=1.23]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 59.08it/s, acc=0, loss=1.14]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 44.47it/s, acc=0, loss=1.13]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 39.34it/s, acc=0, loss=1.12]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 34.53it/s, acc=0, loss=1.18]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 20.47it/s, acc=0, loss=1.09]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 42.62it/s, acc=0, loss=1.11]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 26.45it/s, acc=0, loss=1.07]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 10: 100%|██████████| 2/2 [00:00<00:00, 52.23it/s, acc=0.25, loss=1.17]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 11: 100%|██████████| 2/2 [00:00<00:00, 38.20it/s, acc=0.5, loss=0.959]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 12: 100%|██████████| 2/2 [00:00<00:00, 36.76it/s, acc=0.25, loss=0.916]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 13: 100%|██████████| 2/2 [00:00<00:00, 17.80it/s, acc=0.75, loss=0.915]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 14: 100%|██████████| 2/2 [00:00<00:00, 30.38it/s, acc=0.75, loss=0.875]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 15: 100%|██████████| 2/2 [00:00<00:00, 48.65it/s, acc=1, loss=0.865]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 16: 100%|██████████| 2/2 [00:00<00:00, 59.21it/s, acc=0.25, loss=0.824]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 17: 100%|██████████| 2/2 [00:00<00:00, 44.44it/s, acc=0.5, loss=0.819]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 18: 100%|██████████| 2/2 [00:00<00:00, 21.42it/s, acc=0.5, loss=0.819]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 17.08it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 20: 100%|██████████| 2/2 [00:00<00:00, 18.08it/s, acc=0.75, loss=0.901]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 21: 100%|██████████| 2/2 [00:00<00:00, 63.56it/s, acc=0.75, loss=0.81]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 22: 100%|██████████| 2/2 [00:00<00:00, 60.39it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 23: 100%|██████████| 2/2 [00:00<00:00, 48.18it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 24: 100%|██████████| 2/2 [00:00<00:00, 24.38it/s, acc=0.75, loss=0.803]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 25: 100%|██████████| 2/2 [00:00<00:00, 41.42it/s, acc=0.75, loss=0.835]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 26: 100%|██████████| 2/2 [00:00<00:00, 27.20it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 27: 100%|██████████| 2/2 [00:00<00:00, 36.38it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 28: 100%|██████████| 2/2 [00:00<00:00, 52.34it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 29: 100%|██████████| 2/2 [00:00<00:00, 47.02it/s, acc=0.75, loss=0.835]

Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]





In [46]:
# Continue training for more epochs, optional
n_epochs = 20
best_weights = train(model, n_epochs, train_dataloader, optimizer, criterion)

Epoch 0: 100%|██████████| 2/2 [00:00<00:00, 29.75it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]
saving best acc model


Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 16.69it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 38.33it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 56.02it/s, acc=1, loss=0.863]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 4: 100%|██████████| 2/2 [00:00<00:00, 54.58it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 5: 100%|██████████| 2/2 [00:00<00:00, 31.29it/s, acc=0.75, loss=1.15]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 6: 100%|██████████| 2/2 [00:00<00:00, 22.38it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 7: 100%|██████████| 2/2 [00:00<00:00, 36.70it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 8: 100%|██████████| 2/2 [00:00<00:00, 27.02it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 9: 100%|██████████| 2/2 [00:00<00:00, 17.78it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 10: 100%|██████████| 2/2 [00:00<00:00, 45.14it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 11: 100%|██████████| 2/2 [00:00<00:00, 20.15it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 12: 100%|██████████| 2/2 [00:00<00:00, 33.62it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 13: 100%|██████████| 2/2 [00:00<00:00, 56.37it/s, acc=1, loss=0.857]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 14: 100%|██████████| 2/2 [00:00<00:00, 43.86it/s, acc=0.75, loss=0.811]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 15: 100%|██████████| 2/2 [00:00<00:00, 43.97it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 16: 100%|██████████| 2/2 [00:00<00:00, 18.92it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 17: 100%|██████████| 2/2 [00:00<00:00, 14.75it/s, acc=1, loss=0.802]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 18: 100%|██████████| 2/2 [00:00<00:00, 35.52it/s, acc=1, loss=0.87]


Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


Epoch 19: 100%|██████████| 2/2 [00:00<00:00, 26.43it/s, acc=1, loss=0.857]

Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]





In [47]:
# # restore model and return best accuracy
model.load_state_dict(best_weights)

<All keys matched successfully>

In [48]:
torch.save(model.state_dict(), save_model_name + ".pt")

In [49]:
test_model(model)

Accuracy of the network on the test images: 33.333333333333336 %%
Confusion Matrix:
[[1 0]
 [2 0]]


33.333333333333336

In [50]:
torch_input_test = torch.randn(3, 3, 85, 85)
torch_input_test = torch_input_test.to(0)
out = model(torch_input_test)
print(out)


tensor([[0.],
        [0.],
        [0.]], device='cuda:0', grad_fn=<DivBackward0>)


In [51]:
torch_input = torch.randn(3, 3, 85, 85)

torch_input = torch_input.to(0)
# Export the model
torch.onnx.export(model,                     # model being run
                  torch_input,               # model input (or a tuple for multiple inputs)
                  save_model_name + ".onnx", # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True)

# Import sanity check

In [52]:
import onnx
import onnxruntime as ort

# Load the ONNX model
model = onnx.load(save_model_name + ".onnx")

# Check that the IR is well formed
onnx.checker.check_model(model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(model.graph))

graph main_graph (
  %input.1[FLOAT, 3x3x85x85]
) initializers (
  %features.2.conv.3.fc.0.weight[FLOAT, 8x8]
  %features.2.conv.3.fc.0.bias[FLOAT, 8]
  %features.2.conv.3.fc.2.weight[FLOAT, 8x8]
  %features.2.conv.3.fc.2.bias[FLOAT, 8]
  %features.5.conv.5.fc.0.weight[FLOAT, 24x72]
  %features.5.conv.5.fc.0.bias[FLOAT, 24]
  %features.5.conv.5.fc.2.weight[FLOAT, 72x24]
  %features.5.conv.5.fc.2.bias[FLOAT, 72]
  %features.6.conv.5.fc.0.weight[FLOAT, 32x120]
  %features.6.conv.5.fc.0.bias[FLOAT, 32]
  %features.6.conv.5.fc.2.weight[FLOAT, 120x32]
  %features.6.conv.5.fc.2.bias[FLOAT, 120]
  %classifier.0.weight[FLOAT, 512x120]
  %classifier.0.bias[FLOAT, 512]
  %classifier.3.weight[FLOAT, 1x512]
  %classifier.3.bias[FLOAT, 1]
  %onnx::Conv_278[FLOAT, 4x3x3x3]
  %onnx::Conv_279[FLOAT, 4]
  %onnx::Conv_281[FLOAT, 4x1x3x3]
  %onnx::Conv_282[FLOAT, 4]
  %onnx::Conv_284[FLOAT, 8x4x1x1]
  %onnx::Conv_285[FLOAT, 8]
  %onnx::Conv_287[FLOAT, 8x1x3x3]
  %onnx::Conv_288[FLOAT, 8]
  %onnx::Conv_29

In [53]:
# This output is used to compare the paparazzi c implementation against

# Create an input tensor
input_tensor = np.zeros((3, 3, 85, 85)).astype(np.float32)
input_tensor[0][0][:][:] = 10/255
input_tensor[0][1][:][:] = 20/255
input_tensor[0][2][:][:] = 100/255
print(input_tensor[0][0][0][0])
print(input_tensor[0][1][0][0])
print(input_tensor[0][2][0][0])
# Create an ONNX runtime session
ort_session = ort.InferenceSession(save_model_name + ".onnx")

# Run the model with the input tensor
output = ort_session.run(None, {'input.1': input_tensor})

print(output)

0.039215688
0.078431375
0.39215687
[array([[0.        ],
       [0.79923517],
       [0.79923517]], dtype=float32)]
