In [7]:
import random
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from datasets import load_dataset


In [2]:
# Number of random images to display
NUM_IMAGES = 5

# Subset to view, one of train/test
SUBSET = 'train'

In [None]:
dataset_dict = load_dataset('PrzemekS/highway-vehicles')

# Access the train or test dataset
my_dataset = dataset_dict[SUBSET]

In [None]:
# Define class names (ensure this matches your dataset)
class_names = ['vehicle', 'truck']  
id2label = {id: label for id, label in enumerate(class_names)}

# Randomly select indices
indices = random.sample(range(len(my_dataset)), NUM_IMAGES)

for idx in indices:
    example = my_dataset[idx]
    image = example['image']
    objects = example['objects']
    bboxes = objects['bbox']        # List of bounding boxes
    labels = objects['category']    # List of category IDs

    # Convert image to numpy array
    img_np = np.array(image)
    height, width, _ = img_np.shape

    # Create figure and axes
    fig, ax = plt.subplots(1, figsize=(10, 10))
    ax.imshow(img_np)

    # Add bounding boxes
    for bbox, label in zip(bboxes, labels):
        # bbox is [xmin, ymin, width, height] in absolute pixel coordinates
        xmin = bbox[0]
        ymin = bbox[1]
        box_width = bbox[2]
        box_height = bbox[3]

        # Choose color based on class label
        if label == 0:
            color = 'red'    # Class 0 in red
        elif label == 1:
            color = 'green'  # Class 1 in green
        else:
            color = 'blue'   # Default color for other classes

        # Create a Rectangle patch
        rect = patches.Rectangle((xmin, ymin), box_width, box_height, linewidth=2, edgecolor=color, facecolor='none')

        # Add the patch to the Axes
        ax.add_patch(rect)

        # Add label
        label_name = id2label[label]
        plt.text(xmin, ymin - 5, label_name, color=color, fontsize=12, weight='bold')

    plt.axis('off')
    plt.show()


### Calculate number of images and objects

In [None]:
class_names = ['vehicle', 'truck']  # Update with your actual class names
id2label = {idx: label for idx, label in enumerate(class_names)}

# Initialize counters
total_images = 0
total_objects = 0
class_counts = defaultdict(int)

# Iterate over each split in the dataset (e.g., 'train' and 'test')
for split in dataset_dict.keys():
    dataset = dataset_dict[split]
    num_images = len(dataset)
    num_objects = 0
    split_class_counts = defaultdict(int)
    
    for example in dataset:
        bboxes = example['objects']['bbox']
        labels = example['objects']['category']
        num_objects += len(bboxes)
        
        for label in labels:
            split_class_counts[label] += 1
            class_counts[label] += 1
    
    total_images += num_images
    total_objects += num_objects
    
    print(f"Split: {split}")
    print(f"Number of images: {num_images:,}")
    print(f"Number of objects: {num_objects:,}")
    print("Objects per class:")
    for label_id, count in split_class_counts.items():
        class_name = id2label[label_id]
        print(f"  Class {label_id} ({class_name}): {count:,}")
    print()
    
print(f"Total number of images: {total_images:,}")
print(f"Total number of objects: {total_objects:,}")
print("Total objects per class:")
for label_id, count in class_counts.items():
    class_name = id2label[label_id]
    print(f"  Class {label_id} ({class_name}): {count:,}")