In [9]:
import os
import pandas as pd
import torch
from torchvision.io import decode_image
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import kagglehub
from pathlib import Path




try:
    from lxml import etree
    print("running with lxml.etree")
except ImportError:
    import xml.etree.ElementTree as etree
    print("running with Python's xml.etree.ElementTree")

# Dataset
# https://www.kaggle.com/datasets/karthika95/pedestrian-detection

# Download latest version
#path = kagglehub.dataset_download("karthika95/pedestrian-detection")

print("Path to dataset files:", path)



running with lxml.etree
Path to dataset files: /Users/Dylan/.cache/kagglehub/datasets/karthika95/pedestrian-detection/versions/1


In [80]:
class Label:
    def __init__(self, filename, width, height, x1, y1, x2, y2, label_tag):
        self.filename = filename
        self.width = width
        self.height = height
        self.points = [(x1, y1), (x2, y2)]
        self.label = label_tag

    def __repr__(self):
        p1, p2 = self.points
        return f"Name: {self.filename}\nLabel:{self.label}\nWidth: {self.width}\nHeight: {self.height}\nBnd Box: {p1},{p2}"
        

class MyDataset(Dataset):
    # a dataset has to implement these 3 methods
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        
        self.img_labels = self.get_labels(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels[idx].filename)
        image = decode_image(img_path)
        label = self.img_labels[idx]
        #if self.transform:
            #image = self.transform(image)
        #if self.target_transform:
            #label = self.target_transform(label)
        return image, label

    def get_labels(self, annotations_file):
        labels = []
        # iterate over the directory
        annotations_dir = Path(annotations_file)
        
        for item in annotations_dir.iterdir():
            
            file_path = f"{annotations_dir}/{item.name}"
    
            if Path(file_path).is_file():  
                try:
                    # Parse the XML from the file
                    tree = etree.parse(file_path)
                    # Get the root element
                    root = tree.getroot()
        
                    # get width x height
                    size   = root.find("size")
                    width  = size.find("width").text
                    height = size.find("height").text
        
                    bnd_box = root.find("object").find("bndbox")
                    x_min, y_min, x_max, y_max = float(bnd_box.find("xmin").text), float(bnd_box.find("ymin").text), float(bnd_box.find("xmax").text), float(bnd_box.find("ymax").text)

                    label_tag = root.find("object").find("name").text
                    
                    # create a label                
                    label = Label(item.name.strip(".xml") + ".jpg", width, height, x_min, y_min, x_max, y_max, label_tag)
        
                    labels.append(label)
            
                except etree.XMLSyntaxError as e:
                    print(f"XML parsing error: {e}")
                except IOError as e:
                    print(f"File error: {e}")
            
        return labels






In [82]:
# instantiate training dataset
train_annotations_dir = "/Users/Dylan/Documents/ml/pedestrian_tracking/dataset/Train/Annotations"
train_img_dir = "/Users/Dylan/Documents/ml/pedestrian_tracking/dataset/Train/JPEGImages"
training_dataset = MyDataset(train_annotations_dir, train_img_dir, transform=ToTensor(), target_transform=ToTensor())

In [83]:
# instantiate val dataset
train_annotations_dir = "/Users/Dylan/Documents/ml/pedestrian_tracking/dataset/Val/Annotations"
train_img_dir = "/Users/Dylan/Documents/ml/pedestrian_tracking/dataset/Val/JPEGImages"
training_dataset = MyDataset(train_annotations_dir, train_img_dir, transform=ToTensor(), target_transform=ToTensor())

In [52]:
len(labels)

944