In [14]:

import os, cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

DATA_ROOT = Path("../data/raw/DFL Bundesliga Data Shootout")
TRAIN_DIR = DATA_ROOT/"train"
TEST_DIR = DATA_ROOT/"test"

In [17]:
def index_videos(root: Path, split: str):
    rows = []
    for mp4 in root.rglob("*.mp4"):
        folder = mp4.parent.name
        rows.append({
            "split": split,
            "folder": folder,
            "filename": mp4.name,
            "rel_path": str(mp4.relative_to(DATA_ROOT)),
            "abs_path": str(mp4.resolve())
        })
    return rows

rows = []
rows += index_videos(TRAIN_DIR, "train")
rows += index_videos(TEST_DIR,  "test")
df_idx = pd.DataFrame(rows).sort_values(["split","folder","filename"]).reset_index(drop=True)
df_idx, df_idx.shape

(     split       folder              filename  \
 0     test         test          test (1).mp4   
 1     test         test         test (10).mp4   
 2     test         test         test (11).mp4   
 3     test         test         test (12).mp4   
 4     test         test         test (13).mp4   
 ..     ...          ...                   ...   
 455  train  E3c993bd2_0  E3c993bd2_0 (79).mp4   
 456  train  E3c993bd2_0   E3c993bd2_0 (8).mp4   
 457  train  E3c993bd2_0  E3c993bd2_0 (80).mp4   
 458  train  E3c993bd2_0  E3c993bd2_0 (81).mp4   
 459  train  E3c993bd2_0   E3c993bd2_0 (9).mp4   
 
                                    rel_path  \
 0                         test\test (1).mp4   
 1                        test\test (10).mp4   
 2                        test\test (11).mp4   
 3                        test\test (12).mp4   
 4                        test\test (13).mp4   
 ..                                      ...   
 455  train\E3c993bd2_0\E3c993bd2_0 (79).mp4   
 456   train\E

In [20]:
def probe(video_path: str):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
    n   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
    w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
    h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
    cap.release()
    return {"fps": fps, "frames": n, "duration_sec": (n/fps if fps>0 else None), "resolution": f"{w}x{h}"}

sample_probe = (df_idx
                .groupby(["split","folder"], group_keys=False)
                .apply(lambda g: g.sample(1, random_state=0))
                .reset_index(drop=True)
                .head(10))

probe_rows = []
for _, r in sample_probe.iterrows():
    probe_rows.append({**r, **probe(r.abs_path)})
pd.DataFrame(probe_rows)

Unnamed: 0,split,folder,filename,rel_path,abs_path,fps,frames,duration_sec,resolution
0,test,test,test (4).mp4,test\test (4).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,750,30.0,1920x1080
1,train,A1606b0e6_0,A1606b0e6_0 (57).mp4,train\A1606b0e6_0\A1606b0e6_0 (57).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,749,29.96,1920x1080
2,train,B1606b0e6_1,B1606b0e6_1 (11).mp4,train\B1606b0e6_1\B1606b0e6_1 (11).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,750,30.0,1920x1080
3,train,C35bd9041_0,C35bd9041_0 (37).mp4,train\C35bd9041_0\C35bd9041_0 (37).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,750,30.0,1920x1080
4,train,D35bd9041_1,D35bd9041_1 (11).mp4,train\D35bd9041_1\D35bd9041_1 (11).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,750,30.0,1920x1080
5,train,E3c993bd2_0,E3c993bd2_0 (3).mp4,train\E3c993bd2_0\E3c993bd2_0 (3).mp4,E:\football-tracking\data\raw\DFL Bundesliga D...,25.0,750,30.0,1920x1080
