In [None]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Bias in Helmet Detection

In [None]:
!kaggle datasets download -d andrewmvd/helmet-detection
!unzip helmet-detection.zip -d helmet_data

In [None]:
import os
import xml.etree.ElementTree as ET
from collections import Counter

# Paths
annotations_path = 'helmet_data/annotations'

# Initialize counter
label_counter = Counter()

# Loop through all XML annotation files
for label_file in os.listdir(annotations_path):
    if label_file.endswith('.xml'):
        xml_path = os.path.join(annotations_path, label_file)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        for obj in root.findall('object'):
            class_name = obj.find('name').text
            label_counter[class_name] += 1

# Display total class distribution
print("✅ Class Counts (Total Across Dataset):")
total = sum(label_counter.values())
for cls, count in label_counter.items():
    percentage = (count / total) * 100
    print(f"Class '{cls}': {count} samples ({percentage:.2f}%)")


✅ Class Counts (Total Across Dataset):
Class 'Without Helmet': 489 samples (33.70%)
Class 'With Helmet': 962 samples (66.30%)


This imbalance can lead to:

The model learning to predict "With Helmet" more often (since it’s safer statistically)

Lower recall on the 'Without Helmet' class

What we did in our process was class aware augmentation.

##Checking average brightness

In [None]:
import cv2
import os

def classify_day_night(image_path, threshold=100):
    """Classify image as 'day' or 'night' based on average brightness."""
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    brightness = gray.mean()
    return "day" if brightness > threshold else "night"

# Example usage on a folder
folder_path = "helmet_data/images"
day_count = 0
night_count = 0
total = 0

for filename in os.listdir(folder_path):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        path = os.path.join(folder_path, filename)
        label = classify_day_night(path)
        if label == "day":
            day_count += 1
        else:
            night_count += 1
        total += 1

# Print summary
if total > 0:
    day_pct = (day_count / total) * 100
    night_pct = (night_count / total) * 100
    print(f"\nTotal images: {total}")
    print(f"Day: {day_count} images ({day_pct:.2f}%)")
    print(f"Night: {night_count} images ({night_pct:.2f}%)")
else:
    print("No valid images found in the folder.")



Total images: 764
Day: 642 images (84.03%)
Night: 122 images (15.97%)


Checking crowdedness

In [None]:
pip install ultralytics


In [None]:
from ultralytics import YOLO
import os

# Load pretrained YOLOv8 model (COCO)
model = YOLO("yolov8n.pt")  # or yolov8s.pt, etc.

# Vehicle-related COCO class IDs
vehicle_classes = ['car', 'truck', 'bus', 'motorbike', 'bicycle']

def classify_crowdedness(image_path, threshold=5):
    results = model(image_path)
    boxes = results[0].boxes
    names = results[0].names

    # Count number of detected vehicles
    count = 0
    for cls_id in boxes.cls.cpu().numpy().astype(int):
        label = names[cls_id]
        if label in vehicle_classes:
            count += 1

    crowdedness = "crowded" if count > threshold else "not_crowded"
    return count, crowdedness

# Example usage
folder_path = "helmet_data/images"
for filename in os.listdir(folder_path):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        path = os.path.join(folder_path, filename)
        count, label = classify_crowdedness(path)
        print(f"{filename}: {label} ({count} vehicles)")



image 1/1 /content/helmet_data/images/BikesHelmets588.png: 416x640 4 persons, 1 car, 1 motorcycle, 340.1ms
Speed: 18.8ms preprocess, 340.1ms inference, 37.7ms postprocess per image at shape (1, 3, 416, 640)
BikesHelmets588.png: not_crowded (1 vehicles)

image 1/1 /content/helmet_data/images/BikesHelmets71.png: 384x640 2 persons, 2 motorcycles, 144.6ms
Speed: 2.4ms preprocess, 144.6ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)
BikesHelmets71.png: not_crowded (0 vehicles)

image 1/1 /content/helmet_data/images/BikesHelmets527.png: 352x640 5 persons, 2 motorcycles, 127.3ms
Speed: 2.2ms preprocess, 127.3ms inference, 1.4ms postprocess per image at shape (1, 3, 352, 640)
BikesHelmets527.png: not_crowded (0 vehicles)

image 1/1 /content/helmet_data/images/BikesHelmets145.png: 320x640 6 persons, 4 bicycles, 119.4ms
Speed: 1.9ms preprocess, 119.4ms inference, 1.2ms postprocess per image at shape (1, 3, 320, 640)
BikesHelmets145.png: not_crowded (4 vehicles)

image 1/1 /c

KeyboardInterrupt: 

In [None]:
from ultralytics import YOLO
import os

# Load pretrained YOLOv8 model (COCO)
model = YOLO("yolov8n.pt")  # or yolov8s.pt, etc.

# Vehicle-related COCO class labels
vehicle_classes = ['car', 'truck', 'bus', 'motorbike', 'bicycle']

def count_vehicles(image_path):
    results = model(image_path)
    boxes = results[0].boxes
    names = results[0].names

    count = 0
    for cls_id in boxes.cls.cpu().numpy().astype(int):
        label = names[cls_id]
        if label in vehicle_classes:
            count += 1
    return count

# Folder path
folder_path = "helmet_data/images"
total_count = 0
image_count = 0

# Process each image
for filename in os.listdir(folder_path):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        path = os.path.join(folder_path, filename)
        vehicle_count = count_vehicles(path)
        total_count += vehicle_count
        image_count += 1

# Summary
if image_count > 0:
    average = total_count / image_count
    print(f"\nProcessed {image_count} images")
    print(f"Total vehicles detected: {total_count}")
    print(f"Average vehicles per image: {average:.2f}")
else:
    print("No valid images found in the folder.")



image 1/1 /content/helmet_data/images/BikesHelmets588.png: 416x640 4 persons, 1 car, 1 motorcycle, 140.3ms
Speed: 3.5ms preprocess, 140.3ms inference, 1.4ms postprocess per image at shape (1, 3, 416, 640)

image 1/1 /content/helmet_data/images/BikesHelmets71.png: 384x640 2 persons, 2 motorcycles, 146.6ms
Speed: 2.7ms preprocess, 146.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /content/helmet_data/images/BikesHelmets527.png: 352x640 5 persons, 2 motorcycles, 138.7ms
Speed: 2.2ms preprocess, 138.7ms inference, 1.7ms postprocess per image at shape (1, 3, 352, 640)

image 1/1 /content/helmet_data/images/BikesHelmets145.png: 320x640 6 persons, 4 bicycles, 147.5ms
Speed: 3.0ms preprocess, 147.5ms inference, 1.4ms postprocess per image at shape (1, 3, 320, 640)

image 1/1 /content/helmet_data/images/BikesHelmets733.png: 448x640 1 person, 6 motorcycles, 150.8ms
Speed: 2.6ms preprocess, 150.8ms inference, 1.3ms postprocess per image at shape (1, 3, 448, 640)