In [1]:
import os

# Set your dataset directory (update this path based on where your dataset is stored)
DATASET_PATH = "helmet_dataset"

# Define annotation and image paths
ANNOTATIONS_DIR = os.path.join(DATASET_PATH, "annotations")
IMAGES_DIR = os.path.join(DATASET_PATH, "images")

# Check if dataset exists
if os.path.exists(DATASET_PATH):
    print("Dataset found ✅")
else:
    print("Dataset not found ❌. Check your path!")

Dataset found ✅


## Import Dependencies

In [2]:
import pandas as pd
import xml.etree.ElementTree as ET
import xmltodict
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

In [3]:
def parse_voc_xml(xml_file):
    with open(xml_file) as f:
        data = xmltodict.parse(f.read())

    filename = data["annotation"]["filename"]
    objects = []

    for obj in data["annotation"].get("object", []):
        if isinstance(obj, dict):  # Handle multiple objects
            name = obj["name"]
            bbox = obj["bndbox"]
            xmin, ymin, xmax, ymax = int(bbox["xmin"]), int(bbox["ymin"]), int(bbox["xmax"]), int(bbox["ymax"])
            objects.append([filename, name, xmin, ymin, xmax, ymax])
    
    return objects

# Process all XML files
data = []
for xml_file in os.listdir(ANNOTATIONS_DIR):
    if xml_file.endswith(".xml"):
        data.extend(parse_voc_xml(os.path.join(ANNOTATIONS_DIR, xml_file)))

# Convert to DataFrame
df = pd.DataFrame(data, columns=["filename", "class", "xmin", "ymin", "xmax", "ymax"])

# Save to CSV
csv_path = os.path.join(DATASET_PATH, "helmet_annotations.csv")
df.to_csv(csv_path, index=False)

print(f"Annotations converted and saved to {csv_path} ✅")

Annotations converted and saved to helmet_dataset/helmet_annotations.csv ✅


In [4]:
class HelmetDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = 1 if self.img_labels.iloc[idx, 1] == "helmet" else 0  # 1 for helmet, 0 for no helmet

        if self.transform:
            image = self.transform(image)

        return image, label

In [5]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize for ConvNeXt
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

In [6]:
# Load dataset
dataset = HelmetDataset(csv_path, IMAGES_DIR, transform=transform)

# Split into train & test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Dataset Loaded: {len(train_dataset)} train images, {len(test_dataset)} test images ✅")

Dataset Loaded: 19845 train images, 4962 test images ✅


In [7]:
# Load pre-trained ConvNeXt model
model = models.convnext_tiny(pretrained=True)

# Modify the final classification layer (from 1000 classes → 2 classes)
num_ftrs = model.classifier[2].in_features
model.classifier[2] = nn.Linear(num_ftrs, 2)  # 2 output classes (helmet/no helmet)

# Move model to GPU if available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")  # Use MPS for Apple Silicon
model = model.to(device)

print("ConvNeXt model loaded and modified for helmet detection ✅")

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /Users/caephas/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100.0%


ConvNeXt model loaded and modified for helmet detection ✅


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [9]:
import time

# Training function
def train_model(model, train_loader, criterion, optimizer, device, num_epochs=10):
    model.train()  # Set model to training mode

    for epoch in range(num_epochs):
        start_time = time.time()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # Move to GPU/MPS

            optimizer.zero_grad()  # Clear gradients
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)  # Get predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        end_time = time.time()

        print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%, Time: {end_time-start_time:.2f}s")

    print("Training complete ✅")

# Start training
train_model(model, train_loader, criterion, optimizer, device, num_epochs=10)

Epoch [1/10] - Loss: 0.2636, Accuracy: 90.18%, Time: 1205.87s
Epoch [2/10] - Loss: 0.2040, Accuracy: 91.51%, Time: 1157.84s
Epoch [3/10] - Loss: 0.1854, Accuracy: 91.60%, Time: 1160.85s
Epoch [4/10] - Loss: 0.1750, Accuracy: 91.71%, Time: 1161.94s
Epoch [5/10] - Loss: 0.1656, Accuracy: 91.79%, Time: 1151.21s
Epoch [6/10] - Loss: 0.1615, Accuracy: 91.81%, Time: 1155.89s
Epoch [7/10] - Loss: 0.1593, Accuracy: 92.08%, Time: 1176.61s
Epoch [8/10] - Loss: 0.1551, Accuracy: 91.98%, Time: 2729.15s
Epoch [9/10] - Loss: 0.1554, Accuracy: 92.23%, Time: 1175.75s
Epoch [10/10] - Loss: 0.1510, Accuracy: 92.26%, Time: 1187.71s
Training complete ✅


In [10]:
# Evaluation function
def evaluate_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No gradient updates during testing
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"Test Accuracy: {acc:.2f}% ✅")

# Run evaluation
evaluate_model(model, test_loader, device)

Test Accuracy: 91.25% ✅


In [11]:
# Save trained model
torch.save(model.state_dict(), "helmet_detection_convnext.pth")
print("Model saved as helmet_detection_convnext.pth ✅")

Model saved as helmet_detection_convnext.pth ✅
