In [1]:
import os
import cv2
import torch
import numpy as np
import xml.etree.ElementTree as ET

from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from tqdm import tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [3]:
def parse_jaad_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    tracks = []
    for track in root.findall("track"):
        if track.attrib["label"] != "ped":
            continue
        
        frames = []
        for box in track.findall("box"):
            frames.append({
                "frame": int(box.attrib["frame"]),
                "bbox": [
                    float(box.attrib["xtl"]),
                    float(box.attrib["ytl"]),
                    float(box.attrib["xbr"]),
                    float(box.attrib["ybr"]),
                ]
            })
        tracks.append(frames)
    return tracks


In [4]:
class JAADDataset(Dataset):
    def __init__(self, root, video_ids, transform=None):
        self.root = root
        self.transform = transform
        self.samples = []

        for vid in video_ids:
            xml = os.path.join(root, "annotations", f"{vid}.xml")
            img_dir = os.path.join(root, "images", vid)

            tracks = parse_jaad_xml(xml)

            for track in tracks:
                frames = []
                for f in track:
                    img_path = os.path.join(img_dir, f"{f['frame']:05d}.png")
                    if os.path.exists(img_path):
                        frames.append((img_path, f["bbox"]))
                
                if len(frames) > 0:
                    self.samples.append(frames)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        frames = self.samples[idx]
        imgs = []

        for img_path, bbox in frames:
            img = cv2.imread(img_path)
            x1,y1,x2,y2 = map(int, bbox)
            crop = img[y1:y2, x1:x2]

            if self.transform:
                crop = self.transform(crop)

            imgs.append(crop)

        imgs = torch.stack(imgs)  
        label = torch.tensor(1)   
        return imgs, label


In [5]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225])
])


In [9]:
from pathlib import Path

ROOT = Path(r"")

IMG_DIR = ROOT / "images"
ANN_DIR = ROOT / "annotations"
SPLIT_DIR = ROOT / "split_ids" / "all_videos"

print("Images:", IMG_DIR.exists())
print("Annotations:", ANN_DIR.exists())
print("Splits:", SPLIT_DIR.exists())
print("Split files:", list(SPLIT_DIR.iterdir()))


Images: False
Annotations: False
Splits: False


FileNotFoundError: [Errno 2] No such file or directory: 'split_ids/all_videos'