In [1]:
import os
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch
import pdb
import numpy as np

In [2]:
# Define the path to your processed dataset
data_path = './processed/'

# Define the subjects for each split
train_subjects = [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 3, 5, 6, 7, 10]
val_subjects = [24, 25, 1, 4]
test_subjects = [22, 2, 8, 9]

# Define the background variations
background_variations = ['d1', 'd2', 'd3', 'd4']

processed_folder = './processed/'

In [3]:
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    # Add more augmentations if needed
])

In [4]:
class SequencesExtractor:
    def __init__(self, path, num_frames_per_subsequence=20):
        # Define the number of frames per subsequence
        self.num_frames_per_subsequence = num_frames_per_subsequence
        # List all action folders in the processed folder
        self.action_folders = os.listdir(path) # folders correspond to classes/labels
        self.class_to_label = {class_name: idx for idx, class_name in enumerate(self.action_folders)}
    def create_sequences(self, subjects, background_variations):
        sequences = []
        target_arr = []
        # Iterate over each action folder
        for action_folder in self.action_folders:
            target = action_folder
            action_path = os.path.join(processed_folder, action_folder)

            # List all person folders in the action folder
            person_folders_actual = os.listdir(action_path)
            # Filter videos based on subjects and background variations
            person_folders_target = [f'person{subject:02d}_{action_folder}_{bg}' 
                           for subject in subjects 
                           for bg in background_variations]
            person_folders = set(person_folders_actual) & set(person_folders_target) # it can be that some background variations (or smth else) is missing. 
            # Iterate over each person folder
            for person_folder in person_folders:
                person_path = os.path.join(action_path, person_folder)
                # List all image files in the person folder
                image_files = os.listdir(person_path)

                # Split the image files into subsequences
                num_frames = len(image_files)
                num_subsequences = num_frames // self.num_frames_per_subsequence

                for i in range(num_subsequences):
                    start_index = i * self.num_frames_per_subsequence
                    end_index = start_index + self.num_frames_per_subsequence

                    # Load and process the frames in the subsequence
                    subsequence_frames = []
                    for j in range(start_index, end_index):
                        image_path = os.path.join(person_path, image_files[j])
                        frame = Image.open(image_path).convert('RGB')
                        # Apply any desired spatial augmentations to the frame
                        frame = transform(frame)
                        subsequence_frames.append(frame)

                    # Apply any desired temporal augmentations to the subsequence

                    # Process the subsequence (e.g., feed it to a model for action classification)
                    subsequence_frames = torch.stack(subsequence_frames, dim=0)
                    # Perform further processing on the subsequence

                    target_arr.append(self.class_to_label[target])
                    sequences.append(subsequence_frames)
        return np.array(sequences), np.array(target_arr)

In [5]:
sequencesExtractor = SequencesExtractor(path=processed_folder, num_frames_per_subsequence=20)
train_sequences, train_target_arr = sequencesExtractor.create_sequences(train_subjects, background_variations)
test_sequences, test_target_arr = sequencesExtractor.create_sequences(test_subjects, background_variations)
val_sequences, val_target_arr = sequencesExtractor.create_sequences(val_subjects, background_variations)

  return np.array(sequences), np.array(target_arr)
  return np.array(sequences), np.array(target_arr)


In [11]:
print(len(val_sequences))

2146


In [6]:
from torch.utils.data import Dataset, DataLoader

In [7]:
class KTHDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        label = self.labels[idx]

        return sequence, label

In [9]:
train_dataset = KTHDataset(train_sequences, train_target_arr)
test_dataset = KTHDataset(test_sequences, test_target_arr)
val_dataset = KTHDataset(val_sequences, val_target_arr)

In [9]:
train_dataset

<__main__.KTHDataset at 0x7f96b55d24c0>

In [10]:
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Now, you can iterate over the dataloader
for batch in val_dataloader:
    pdb.set_trace()
    sequences, labels = batch
    # Your model training code goes here

> [0;32m<ipython-input-12-f8b92bfe9f49>[0m(4)[0;36m<module>[0;34m()[0m
[0;32m      1 [0;31m[0;31m# Now, you can iterate over the dataloader[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      2 [0;31m[0;32mfor[0m [0mbatch[0m [0;32min[0m [0mval_dataloader[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m    [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 4 [0;31m    [0msequences[0m[0;34m,[0m [0mlabels[0m [0;34m=[0m [0mbatch[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m    [0;31m# Your model training code goes here[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  batch


[tensor([[[[[0.6784, 0.6824, 0.6902,  ..., 0.7176, 0.7137, 0.6980],
           [0.6275, 0.6235, 0.6235,  ..., 0.6549, 0.6588, 0.6588],
           [0.6157, 0.6157, 0.6157,  ..., 0.6353, 0.6431, 0.6471],
           ...,
           [0.5216, 0.5333, 0.5137,  ..., 0.4431, 0.4275, 0.4314],
           [0.5098, 0.5098, 0.5059,  ..., 0.4039, 0.3961, 0.4000],
           [0.5255, 0.5137, 0.4824,  ..., 0.3765, 0.3647, 0.3804]],

          [[0.6784, 0.6824, 0.6902,  ..., 0.7176, 0.7137, 0.6980],
           [0.6275, 0.6235, 0.6235,  ..., 0.6549, 0.6588, 0.6588],
           [0.6157, 0.6157, 0.6157,  ..., 0.6353, 0.6431, 0.6471],
           ...,
           [0.5216, 0.5333, 0.5137,  ..., 0.4431, 0.4275, 0.4314],
           [0.5098, 0.5098, 0.5059,  ..., 0.4039, 0.3961, 0.4000],
           [0.5255, 0.5137, 0.4824,  ..., 0.3765, 0.3647, 0.3804]],

          [[0.6784, 0.6824, 0.6902,  ..., 0.7176, 0.7137, 0.6980],
           [0.6275, 0.6235, 0.6235,  ..., 0.6549, 0.6588, 0.6588],
           [0.6157, 0.615

ipdb>  batch.shape


*** AttributeError: 'list' object has no attribute 'shape'


ipdb>  n


> [0;32m<ipython-input-12-f8b92bfe9f49>[0m(2)[0;36m<module>[0;34m()[0m
[0;32m      1 [0;31m[0;31m# Now, you can iterate over the dataloader[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 2 [0;31m[0;32mfor[0m [0mbatch[0m [0;32min[0m [0mval_dataloader[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m    [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m    [0msequences[0m[0;34m,[0m [0mlabels[0m [0;34m=[0m [0mbatch[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m    [0;31m# Your model training code goes here[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  sequences.shape


torch.Size([32, 20, 3, 64, 64])


ipdb>  labels.shape


torch.Size([32])
