## Imports

In [3]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Torch device:", device) # Quick check to see if we're using GPU or CPU.


from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Personal imports
import dataset.download_and_preprocess as dl

Torch device: cuda


## Download the dataset and pre-process the dataset

In [4]:
action_space = ['walking', 'running', 'jogging', 'boxing', 'handwaving', 'handclapping']
for action in action_space:
    dl.download_and_extract(action)
    dl.extract_and_save_frames(action)


## Split the data into train, test, validate

In [8]:
class ActionDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        # Convert to tensor if needed
        image = torch.tensor(image, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label

In [9]:
images = np.array(all_frames)
labels = np.array(all_actions)   

X_train, X_temp, y_train, y_temp = train_test_split(
    images, labels, 
    stratify=labels, 
    test_size=0.3, 
    random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, 
    stratify=y_temp, 
    test_size=0.5, 
    random_state=42
)


# Data augmentation for training
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance to flip horizontally
    transforms.RandomVerticalFlip(p=0.5),    # 50% chance to flip vertically
    # Add any normalization here if needed
])


val_test_transform = transforms.Compose([
    transforms.ToTensor()
])


train_dataset = ActionDataset(X_train, y_train, transform=train_transform)
val_dataset = ActionDataset(X_val, y_val, transform=val_test_transform)
test_dataset = ActionDataset(X_test, y_test, transform=val_test_transform)

# Create sample weights based on training label frequency
class_sample_count = np.bincount(y_train)
class_weights = 1. / class_sample_count
sample_weights = class_weights[y_train]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## TODO
1. Dataset is too large, need to break it up in bits.
2. AutoEncoder training
