In [121]:
from torch.utils.data import Dataset
import pathlib
import os
from PIL import Image
import numpy as np

class HMDB51Dataset():
  def __init__(self, targ_dir: str, transform=None):
    self.total_examples = self.count_subfolders(targ_dir)
    self.classes, self.class_to_index = self.get_classes(targ_dir)
    self.paths = self.get_paths(targ_dir)

  def __len__(self):
    return self.total_examples

  def __getitem__(self, index: int):
    X = self.get_example(self.paths[index])
    y = self.get_label(self.paths[index])
    return X, y

  def count_subfolders(self, folder_path):
      count = 0
      for folder in os.listdir(folder_path):
          subfolder_path = os.path.join(folder_path, folder)
          if os.path.isdir(subfolder_path):
              for subfolder in os.listdir(subfolder_path):
                  if os.path.isdir(os.path.join(subfolder_path, subfolder)):
                      count += 1
      return count

  def get_classes(self, targ_dir: str):

      count = 0
      classes = []
      indexes = []

      for folder in os.listdir(targ_dir):
          subfolder_path = os.path.join(targ_dir, folder)
          classes.append(folder)
          indexes.append(count)
          count += 1
      return classes, indexes

  def get_paths(self, targ_dir: str):
    path_obj = pathlib.Path(targ_dir)
    paths = []

    for subfolder in path_obj.iterdir():
        if subfolder.is_dir():      
            for subsubfolder in subfolder.iterdir():
                if subsubfolder.is_dir():
                    paths.append(subsubfolder.resolve().as_posix())
    return paths
  
  def get_label(self, directory: str):
    return self.classes.index(pathlib.Path(directory).parts[-2])

  def get_example(self, directory: str):
    segments = []
    segment_prefix = "/segment-"
    FLOW_FRAME_NAME = "flow_frame.png"
    RGB_FRAME_NAME = "rgb_frame.png"
    POSE_FRAME_NAME = "pose_frame.png"
    SEGMENTS_NUMBER = self.get_subfolders_number(directory)
    
    for segment_index in range(SEGMENTS_NUMBER):
      segment_path = directory + segment_prefix + str(segment_index) + "/"
      rgb_frame_path = segment_path + RGB_FRAME_NAME
      flow_frame_path = segment_path + FLOW_FRAME_NAME
      pose_frame_path = segment_path + POSE_FRAME_NAME
      
      # Let's get the actual images
      rgb_frame = np.asarray(Image.open(rgb_frame_path))
      print(f"rgb frame dimensions: {rgb_frame.shape}")
      # flow_frame = np.asarray(Image.open(flow_frame_path))
      # print(f"flow frame dimensions: {flow_frame.shape}")
      # pose_frame = np.asarray(Image.open(pose_frame_path))
      # print(f"pose frame dimensions: {pose_frame.shape}")
      
      segments.append((rgb_frame))#prev flow_frame, pose_frame

    return segments
  def get_subfolders_number(self, directory):
    path_obj = pathlib.Path(directory)

    num_subfolders = 0
    for subfolder in path_obj.iterdir():
        if subfolder.is_dir():
            num_subfolders += 1
    return num_subfolders

In [122]:
dataset = HMDB51Dataset("/content/drive/MyDrive/thesis/data/HMDB-51-downsampled_copy")

In [123]:
from torch.utils.data import DataLoader 

dataloader = DataLoader(dataset=dataset, batch_size=2, num_workers=2, shuffle=True)

In [124]:
video_batch, label_batch = next(iter(dataloader))

rgb frame dimensions: (240, 320, 3)
rgb frame dimensions: (240, 320, 3)
rgb frame dimensions: (240, 320, 3)
rgb frame dimensions: (240, 432, 3)
rgb frame dimensions: (240, 432, 3)
rgb frame dimensions: (240, 432, 3)
rgb frame dimensions: (240, 416, 3)
rgb frame dimensions: (240, 416, 3)
rgb frame dimensions: (240, 416, 3)


RuntimeError: ignored

rgb frame dimensions: (240, 416, 3)


In [114]:
len(video_batch)

3