Will continue with this approach (further extending it to YOLOv8) if sufficient data for the non-threat class is acquired.

In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torchsummary import summary
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
img_size = (256, 256)
batch_size = 32
epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(0.1 * 180),
    transforms.RandomAffine(0, scale=(0.9, 1.1)),
    transforms.ColorJitter(contrast=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.0,0.0,0.0], std=[1.0/255]*3)
])

val_test_transforms = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.0,0.0,0.0], std=[1.0/255]*3)
])

In [None]:
train_ds = ImageFolder("split_data/train", transform=train_transforms)
val_ds   = ImageFolder("split_data/val", transform=val_test_transforms)
test_ds  = ImageFolder("split_data/test", transform=val_test_transforms)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)

        self.pool = nn.MaxPool2d(2,2)
        self.drop25 = nn.Dropout(0.25)
        self.gap = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(256, 128)
        self.drop50 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.drop25(self.pool(F.relu(self.bn1(self.conv1(x)))))
        x = self.drop25(self.pool(F.relu(self.bn2(self.conv2(x)))))
        x = self.drop25(self.pool(F.relu(self.bn3(self.conv3(x)))))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.gap(x)
        x = torch.flatten(x,1)
        x = self.drop50(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

model = Model().to(device)
summary(model, (3, 256, 256))

In [None]:
criterion = nn.BCEWithLogitsLoss()  # safer than BCELoss + sigmoid
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(epochs):
    model.train()
    train_loss, correct, total = 0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        preds = (torch.sigmoid(outputs) > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss /= total
    train_acc = correct / total

    # Validation
    model.eval()
    val_loss, val_correct, val_total = 0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            preds = (torch.sigmoid(outputs) > 0.5).float()
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_loss /= val_total
    val_acc = val_correct / val_total

    print(f"Epoch [{epoch+1}/{epochs}] "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

In [None]:
model.eval()
preds, true_labels = [], []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds.append(outputs.cpu().numpy())
        true_labels.append(labels.numpy())

preds = np.concatenate(preds, axis=0)
pred_labels = (preds > 0.5).astype("int32").flatten()
true_labels = np.concatenate(true_labels, axis=0)

print(classification_report(true_labels, pred_labels, target_names=["No Threat", "Threat"]))
cm = confusion_matrix(true_labels, pred_labels)
print(cm)