In [1]:
import numpy as np
import av
import os

def create_grid_video_from_paths(paths: list, output_path: str, 
                                 fps: int = 8, margin: int = 10):
    """
    Create a 5x5 grid video from a list of clip paths.
    
    Args:
        paths: list of paths to .npy clip files
        output_path: path to save the output video
        fps: frames per second for output video
        margin: pixel margin between grid cells
    """
    # Select 25 videos equally distributed across the provided paths
    num_videos = 25  # 5x5 grid
    num_total = len(paths)
    if num_total < num_videos:
        print(f"Warning: Only {num_total} videos available, using all of them")
        selected_paths = paths
        grid_size = int(np.ceil(np.sqrt(num_total)))
    else:
        indices = np.linspace(0, num_total - 1, num_videos, dtype=int)
        selected_paths = [paths[i] for i in indices]
        grid_size = 5
    
    # Load all clips
    clips = []
    for path in selected_paths:
        clip = np.load(path)  # Shape: (16, 224, 224, 3)
        # Ensure uint8 format
        if clip.dtype != np.uint8:
            clip = np.clip(clip, 0, 255).astype(np.uint8)
        clips.append(clip)
    
    # Get dimensions
    T, H, W, C = clips[0].shape  # T=16, H=224, W=224, C=3
    cell_h, cell_w = H, W  # 224x224
    
    # Calculate output dimensions with margins
    output_h = grid_size * cell_h + (grid_size - 1) * margin
    output_w = grid_size * cell_w + (grid_size - 1) * margin
    
    # Create output container
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    container = av.open(output_path, mode='w')
    stream = container.add_stream('libx264', rate=fps)
    stream.width = output_w
    stream.height = output_h
    stream.pix_fmt = 'yuv420p'
    
    # Create grid for each frame
    for frame_idx in range(T):
        # Create empty canvas for this frame (white background)
        grid_frame = np.ones((output_h, output_w, C), dtype=np.uint8) * 255
        
        # Place each clip's frame in the grid
        for grid_idx, clip in enumerate(clips):
            row = grid_idx // grid_size
            col = grid_idx % grid_size
            
            # Position in output frame (accounting for margins)
            y_start = row * (cell_h + margin)
            y_end = y_start + cell_h
            x_start = col * (cell_w + margin)
            x_end = x_start + cell_w
            
            # Place the frame
            grid_frame[y_start:y_end, x_start:x_end] = clip[frame_idx]
        
        # Convert to av.VideoFrame and encode
        frame = av.VideoFrame.from_ndarray(grid_frame, format='rgb24')
        for packet in stream.encode(frame):
            container.mux(packet)
    
    # Flush encoder
    for packet in stream.encode():
        container.mux(packet)
    
    container.close()
    print(f"Grid video saved to {output_path} ({output_w}x{output_h}, {T} frames, {len(clips)} clips)")

In [None]:
from train import *
import random
from sklearn.model_selection import train_test_split
import re
from collections import Counter

def split_data():
    train_paths = []
    val_paths = []
    test_paths = []

    # Collect all paths with their labels and sources
    all_paths = []
    all_labels = []
    all_sources = []

    for label in os.listdir("preprocessed_clips_3"):
        paths = [f"preprocessed_clips_3/{label}/{p}" for p in os.listdir(f"preprocessed_clips_3/{label}")]
        
        for path in paths:
            # Extract source from filename: pattern is clip_task_[kamx_nums]_index_c.npy
            # Source is the part matching task_[kamx_nums]_index_c (everything from task_ to .npy)
            filename = os.path.basename(path)
            # Match task_ followed by any characters until .npy
            source_match = re.search(r'task_kam\d+_[^_]+', filename)
            if source_match:
                source = source_match.group(0)
            else:
                # Fallback: use filename without extension as source
                source = os.path.splitext(filename)[0]
            
            all_paths.append(path)
            all_labels.append(label)
            all_sources.append(source)

    # Create combined stratification key: label_source
    # This ensures both label and source distributions are maintained
    stratify_key = [f"{label}_{source}" for label, source in zip(all_labels, all_sources)]


    # First split: 80% train, 20% temp (which will become val+test)
    train_paths, temp_paths, train_labels, temp_labels, train_sources, temp_sources = train_test_split(
        all_paths, all_labels, all_sources,
        test_size=0.2,
        stratify=stratify_key,
        random_state=632
    )

    # Second split: split temp into 50% val, 50% test (which gives 10% val, 10% test overall)
    # Create new stratification key for temp split
    temp_stratify_key = [f"{label}_{source}" for label, source in zip(temp_labels, temp_sources)]

    temp_counts = Counter(temp_stratify_key)
    min_count = min(temp_counts.values())

    if min_count >= 2:
        val_paths, test_paths, val_labels, test_labels, val_sources, test_sources = train_test_split(
            temp_paths, temp_labels, temp_sources,
            test_size=0.5,
            stratify=temp_stratify_key,
            random_state=42
        )
    else:
        # Fall back to non-stratified split if some classes have < 2 members
        val_paths, test_paths, val_labels, test_labels, val_sources, test_sources = train_test_split(
            temp_paths, temp_labels, temp_sources,
            test_size=0.5,
            stratify=None,
            random_state=632
        )

    # Convert to class variables
    train_paths = train_paths
    val_paths = val_paths
    test_paths = test_paths
    
    return train_paths, val_paths, test_paths

class BoxingDataset(Dataset):
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std  = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
    all_splits = split_data()
    train_paths = all_splits[0]
    val_paths = all_splits[1]
    test_paths = all_splits[2]

        
        
    def __init__(self, split: str):
        self.split = split
        
        
    def __len__(self):
        if self.split == "train":
            return len(self.train_paths)
        elif self.split == "val":
            return len(self.val_paths)
        elif self.split == "test":
            return len(self.test_paths)
        else:
            raise ValueError(f"Unknown split: {self.split}")

    def __getitem__(self, idx):
        if self.split == "train":
            path = self.train_paths[idx]
        elif self.split == "val":
            path = self.val_paths[idx]
        elif self.split == "test":
            path = self.test_paths[idx]
        else:
            raise ValueError(f"Unknown split: {self.split}")
        
        clip = np.load(path)
        
        # convert to float and scale to 0-1
        clip = clip.astype(np.float32) / 255.0
        
        # image net mean/std
        clip = (clip - self.mean) / self.std
        
        #reorder to (T,C,H,W)
        clip = clip.transpose(0,3,1,2)
        
        #convert to tensor
        clip = torch.from_numpy(clip)
        
        return {
            "pixel_values": clip,
            "labels": torch.tensor(LABEL2ID[path.split("/")[-2]], dtype=torch.long) 
        }




from collections import defaultdict
paths = BoxingDataset.train_paths
label_counts = defaultdict(int)
for path in paths:
    label = path.split("/")[-2]
    label_counts[label] += 1

print(dict(label_counts))



  from .autonotebook import tqdm as notebook_tqdm


{'LHHP': 7005, 'RHMP': 2004, 'RHBP': 727, 'RHHP': 3486, 'LHBlP': 1546, 'LHMP': 3373, 'RHBlP': 738, 'LHBP': 761}


In [3]:
for label in os.listdir("preprocessed_clips_3"):
    paths = [f"preprocessed_clips_3/{label}/{p}" for p in os.listdir(f"preprocessed_clips_3/{label}")]
    create_grid_video_from_paths(paths, f'grid_vids_4/{label}_grid.mp4')


Grid video saved to grid_vids_4/RHMP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/RHBP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/LHBlP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/RHBlP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/LHBP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/LHMP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/RHHP_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/LHHP_grid.mp4 (1160x1160, 16 frames, 25 clips)


In [4]:
train_paths = BoxingDataset.train_paths
val_paths = BoxingDataset.val_paths
test_paths = BoxingDataset.test_paths

for split, paths_list in [('train',train_paths), ('val',val_paths), ('test',test_paths)]:
    paths = paths_list
    create_grid_video_from_paths(paths, f'grid_vids_4/{split}_grid.mp4')

Grid video saved to grid_vids_4/train_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/val_grid.mp4 (1160x1160, 16 frames, 25 clips)
Grid video saved to grid_vids_4/test_grid.mp4 (1160x1160, 16 frames, 25 clips)
