In [1]:
%pip install opencv-python
%pip install imageio[ffmpeg]

Note: you may need to restart the kernel to use updated packages.



In [2]:

csv_path = 'data/3DYoga90.csv'
sequence_path = 'short/download_log.txt'
pose_list = ['mountain', 'half-way-lift', 'standing-forward-bend', 'downward-dog']
NUM_CLASSES = len(pose_list)
video_dir = 'short'

In [3]:
# Constants
FRAME_HEIGHT = 224  # VGG16 input size
FRAME_WIDTH = 224
SEQUENCE_LENGTH = 16 
BATCH_SIZE = 8

In [6]:
import pandas as pd
import os
import torch
import numpy as np
import cv2 as cv
from torchvision.transforms import transforms
from PIL import Image
import imageio

length_of_dataset = 0

class Dataset:
    def __init__(self, csv_path, sequence_path, pose_list, video_dir):
        with open(sequence_path) as f:
            sequence_list = f.read().splitlines()
            sequence_list = [int(x) for x in sequence_list]
            
        self.df = pd.read_csv(csv_path)
        # Keep only downloaded sequences
        self.df = self.df[self.df['sequence_id'].isin(sequence_list)]
        # Keep only required classes
        self.df = self.df[self.df['l3_pose'].isin(pose_list)]

        self.pose_to_idx = {pose: idx for idx, pose in enumerate(pose_list)}

        self.length_of_dataset = len(self.df)

        self.video_dir = video_dir

        self.transforms = transforms.Compose([
            transforms.Resize((FRAME_HEIGHT, FRAME_WIDTH)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return self.length_of_dataset

    def print(self):
        print(len(self.df))
        print(self.pose_to_idx)
        print(len(self))

    def __getitem__(self, i):
        sequence_id = self.df.iloc[i]['sequence_id']
        print(sequence_id)
        video_path = os.path.join(self.video_dir, f"{sequence_id}.mp4")
        pose = self.df.iloc[i]['l3_pose']

        label = torch.zeros(NUM_CLASSES)
        label[self.pose_to_idx[pose]] = 1

        frames = self.read_video_with_imageio(video_path)
        print(frames.shape)
        
        return video_path, pose, label
    
    def read_video_with_imageio(self, video_path):
        reader = imageio.get_reader(video_path, 'ffmpeg')
        total_frames = reader.count_frames()
        print(total_frames)
        indices = np.linspace(0, total_frames-1, SEQUENCE_LENGTH, dtype=int)
        
        frames = []
        for i, frame in enumerate(reader):
            if i in indices:
                frame = Image.fromarray(frame)
                # Apply your transforms here
                frame = self.transforms(frame)
                frames.append(frame)
        
        reader.close()
        frames = torch.stack([torch.tensor(np.array(f)) for f in frames])
        return frames  

    def _get_frames(self, video_path):
        """Load video and sample SEQUENCE_LENGTH frames."""
        cap = cv.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
        
        # Calculate sampling indices
        indices = np.linspace(0, total_frames-1, SEQUENCE_LENGTH, dtype=int)
        print('Total Frames:',total_frames)
        for frame_idx in range(total_frames):
            ret, frame = cap.read()
            if not ret:
                break

            # cv.imshow('WIn',frame)
            # cv.waitKey(0)
            print(frame_idx)
            if frame_idx in indices:
                # Convert BGR to RGB
                frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
                # Convert to PIL Image
                frame = Image.fromarray(frame)
                # Apply transforms
                frame = self.transforms(frame)
                frames.append(frame)
                
        cap.release()
        
        # Stack frames into tensor
        frames = torch.stack(frames)
        print(frames.shape)
        return frames
               
        

a = Dataset(csv_path, sequence_path, pose_list, video_dir)
# a.print()
a[0]

1000
97
torch.Size([16, 3, 224, 224])


('short\\1000.mp4', 'mountain', tensor([1., 0., 0., 0.]))