In [1]:
import os
import datasets
import ffmpeg

  from .autonotebook import tqdm as notebook_tqdm


# Zone Identified Deletions

In [27]:
def load_video(filepath):
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File not found: {filepath}")
    
    out, err = (
        ffmpeg
        .input(filepath)
        .output('pipe:', format='rawvideo', pix_fmt='rgb24', s='224x224')
        .run(capture_stdout=True, capture_stderr=True)
    )
    
    return out

def get_class_label_mapping(data_dir):
    classes = os.listdir(data_dir)
    return {cls: idx for idx, cls in enumerate(classes)}

def create_hf_dataset(data_dir):
    class_mapping = get_class_label_mapping(data_dir)
    videos = []
    labels = []
    
    for class_name, class_idx in class_mapping.items():
        class_dir = os.path.join(data_dir, class_name)
        for video_file in os.listdir(class_dir):
            video_path = os.path.join(class_dir, video_file)
            video_data = load_video(video_path)
            videos.append(video_data)
            labels.append(class_idx)
    
    # Create a Hugging Face Dataset
    dataset = datasets.Dataset.from_dict({
        'video': videos,
        'label': labels
    })
    
    return dataset

dataset = create_hf_dataset(r"/home/aman/WorkoutVideoClassification/data")

: 