In [5]:
import torch
import os
import cv2
import torchvision.transforms as transforms
import numpy as np

In [6]:
class FitnessData(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None, frames_per_clip=16):
        self.root_dir = root_dir
        if transform is None:
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # mean and std adopted from ImageNet

            ])
        else:
            self.transform = transform
        self.frames_per_clip = frames_per_clip
        self.classes = [cls for cls in os.listdir(root_dir) if not cls.startswith(".")]
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.idx_to_class = {i: cls for cls, i in self.class_to_idx.items()}
        self.video_files = []
        for cls in self.classes:
            cls_path = os.path.join(root_dir, cls)
            if os.path.isdir(cls_path):
                for file in os.listdir(cls_path):
                    if file.endswith((".mp4", ".avi", ".mov")):
                        self.video_files.append((os.path.join(cls_path, file), self.class_to_idx[cls]))

    def __len__(self):
        return len(self.video_files)
    
    def read_video_frames(self, video_path):
        video = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
        frames = []
        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        if total_frames == 0:
            video.release()
            return None
        frame_indices = np.linspace(0, total_frames - 1, self.frames_per_clip).astype(int)
        for idx in frame_indices:
            video.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = video.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)

        video.release()
        return frames if len(frames) == self.frames_per_clip else None
    
    def __getitem__(self, idx):
        attempts = 3 
        for _ in range(attempts):
            video_path, label = self.video_files[idx]
            if not os.path.exists(video_path):
                print(f"Warning: Video file not found → {video_path}")
                idx = np.random.randint(0, len(self))
                continue  # Try another video
            frames = self.read_video_frames(video_path)

            if frames is not None:  # Successfully read
                if self.transform is not None:
                    frames = [self.transform(frame) for frame in frames]
                return torch.stack(frames), label
            
            idx = np.random.randint(0, len(self))
        
        raise RuntimeError(f"Failed to load video after {attempts} attempts: {video_path}")



In [7]:
dataset = FitnessData(root_dir="workoutfitness-video")
print(f"Total videos in dataset: {len(dataset)}")

Total videos in dataset: 592


In [11]:
print("Number of classes:", len(dataset.classes))

Number of classes: 22


In [10]:
print("Class-to-Index Mapping:", dataset.class_to_idx)
print("Index-to-Class Mapping:", dataset.idx_to_class)

Class-to-Index Mapping: {'deadlift': 0, 'hammer curl': 1, 'tricep Pushdown': 2, 'squat': 3, 'push-up': 4, 'tricep dips': 5, 'lat pulldown': 6, 'barbell biceps curl': 7, 'chest fly machine': 8, 'incline bench press': 9, 'leg extension': 10, 'shoulder press': 11, 't bar row': 12, 'decline bench press': 13, 'bench press': 14, 'lateral raise': 15, 'pull Up': 16, 'plank': 17, 'leg raises': 18, 'hip thrust': 19, 'romanian deadlift': 20, 'russian twist': 21}
Index-to-Class Mapping: {0: 'deadlift', 1: 'hammer curl', 2: 'tricep Pushdown', 3: 'squat', 4: 'push-up', 5: 'tricep dips', 6: 'lat pulldown', 7: 'barbell biceps curl', 8: 'chest fly machine', 9: 'incline bench press', 10: 'leg extension', 11: 'shoulder press', 12: 't bar row', 13: 'decline bench press', 14: 'bench press', 15: 'lateral raise', 16: 'pull Up', 17: 'plank', 18: 'leg raises', 19: 'hip thrust', 20: 'romanian deadlift', 21: 'russian twist'}


In [9]:
video_sample, label = dataset[0]
print(f"Video shape: {video_sample.shape}, Label: {label}")
print(video_sample)
print("min:", video_sample.min(), "max:", video_sample.max())

Video shape: torch.Size([16, 3, 224, 224]), Label: 0
tensor([[[[-1.4500, -1.4329, -1.4329,  ..., -1.3130, -1.3302, -1.3302],
          [-1.4500, -1.4329, -1.4329,  ..., -1.3130, -1.3302, -1.3302],
          [-1.4500, -1.4329, -1.4329,  ..., -1.3130, -1.3302, -1.3302],
          ...,
          [-1.7583, -1.7583, -1.7240,  ..., -1.6898, -1.7240, -1.7240],
          [-1.7412, -1.7412, -1.7412,  ..., -1.7069, -1.7412, -1.7412],
          [-1.7412, -1.7412, -1.7412,  ..., -1.7069, -1.7412, -1.7412]],

         [[-1.2129, -1.1954, -1.1954,  ..., -1.0553, -1.0728, -1.0728],
          [-1.2129, -1.1954, -1.1779,  ..., -1.0553, -1.0728, -1.0728],
          [-1.2129, -1.1954, -1.1604,  ..., -1.0553, -1.0728, -1.0728],
          ...,
          [-1.5805, -1.5805, -1.5805,  ..., -1.5280, -1.5630, -1.5630],
          [-1.5980, -1.5980, -1.5630,  ..., -1.5105, -1.5455, -1.5455],
          [-1.5980, -1.5980, -1.5630,  ..., -1.5105, -1.5455, -1.5455]],

         [[-0.8284, -0.8110, -0.8110,  ..., -0.82