Possible -> normalize dataset (subtract mean and divide by std)
Learning rate schedular?

### Imports

In [1]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from torchvision.io import read_image, ImageReadMode
from torchvision.transforms.functional import convert_image_dtype
import lightning as L
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import matplotlib.pyplot as plt
import random
from pathlib import Path

# check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


### Data

In [2]:
# Create dataset class
class DroneImagesDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.annotations = np.genfromtxt(csv_file, delimiter=',', dtype=None, encoding=None, skip_header=True)
        self.transform = transform

    def __getitem__(self, index):
        img_path = self.annotations[index][0]
        img_path = str(Path(img_path))
        image = convert_image_dtype(read_image(img_path), torch.float)
        left, forward, right = float(self.annotations[index][1]), float(self.annotations[index][2]), float(self.annotations[index][3])
        y_label = torch.tensor([left, forward, right])
        
        if self.transform:
            image = self.transform(image)

        return (image, y_label)
    
    def __len__(self):
        return len(self.annotations)

In [3]:
# to try: bilinear, bicubic or nearest exact
IMAGE_TRANSFORM = transforms.Compose([
    transforms.CenterCrop((520, 120)),
    transforms.Grayscale(),
    transforms.Resize((130, 60), interpolation=transforms.InterpolationMode.NEAREST_EXACT),
])

In [4]:
val_ratio = 0.2
test_ratio = 0.1
batch_size = 32
dataset = DroneImagesDataset(csv_file='labeled_images.csv', transform=IMAGE_TRANSFORM)

# Split the dataset into training, validation, and test sets
num_samples = len(dataset)
num_val_samples = int(val_ratio * num_samples)
num_test_samples = int(test_ratio * num_samples)
num_train_samples = num_samples - num_val_samples - num_test_samples
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [num_train_samples, num_val_samples, num_test_samples]
)

# Create DataLoaders for the training, validation, and test sets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, prefetch_factor=2, persistent_workers=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, prefetch_factor=2, persistent_workers=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, prefetch_factor=2, persistent_workers=True)

### Lightning module

In [5]:
class LightningCNN(L.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.model = torch.nn.Sequential(
            # Convolutional layer 1
            torch.nn.Conv2d(1, 6, kernel_size=(5,5), stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2,2), stride=2),
            torch.nn.Dropout(0.2),
            # Convolutional layer 2
            torch.nn.Conv2d(6, 5, kernel_size=(5,5), stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2,2), stride=2),
            torch.nn.Dropout(0.1),
            # Convolutional layer 3
            torch.nn.Conv2d(5, 5, kernel_size=(5,5), stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=(2,2), stride=2),
            torch.nn.Dropout(0.1),
            # Fully connected layer
            torch.nn.Flatten(),
            torch.nn.Linear(350, 3)
        )
        
        self.save_hyperparameters(cfg)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y, label_smoothing=self.hparams.label_smoothing)
        loss.backward(retain_graph=True)
        acc = torch.sum(torch.argmax(y_hat, dim=1) == torch.argmax(y, dim=1)) / len(y)
        self.log("train/loss", loss)
        self.log("train/acc", acc)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        acc = torch.sum(torch.argmax(y_hat, dim=1) == torch.argmax(y, dim=1)) / len(y)
        self.log("val/loss", loss)
        self.log("val/acc", acc)
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        acc = torch.sum(torch.argmax(y_hat, dim=1) == torch.argmax(y, dim=1)) / len(y)
        self.log("test/loss", loss)
        self.log("test/acc", acc)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

In [6]:
wand_blogger = WandbLogger(project="MAV-CNN-Project")

num_epochs = 20
label_smoothing = 0.1
learning_rate = 0.0005

model_name = f"CNN-e{num_epochs}-ls{label_smoothing}-lr{learning_rate}".replace(".", "_")

cfg={"architecture": "CNN",
     "learning_rate": learning_rate,
     "epochs": num_epochs,
     "label_smoothing": label_smoothing}

early_stop_callback = EarlyStopping(monitor="val/acc", min_delta=0.005, patience=5, verbose=False, mode="max")
trainer = L.Trainer(max_epochs=cfg["epochs"], logger=wand_blogger, default_root_dir=f"lightning_logs/{model_name}", callbacks=[early_stop_callback])
model = LightningCNN(cfg)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### Train

In [7]:
trainer.fit(model, train_loader, val_loader)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtimdb[0m. Use [1m`wandb login --relogin`[0m to force relogin



  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 2.6 K 
-------------------------------------
2.6 K     Trainable params
0         Non-trainable params
2.6 K     Total params
0.010     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


### Save to onnx

In [8]:
# Save model to onnx
if False:
    model.eval()
    dummy_input = torch.randn(1, 1, 130, 60)
    torch.onnx.export(model, dummy_input, f"{model_name}.onnx")

In [9]:
test = trainer.test(model, test_loader)

Testing: |          | 0/? [00:00<?, ?it/s]