## Imports

In [1]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Torch device:", device) # Quick check to see if we're using GPU or CPU.


from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from pathlib import Path

# Personal imports
import dataset.download_and_preprocess as dl
from dataset.dataloader import KTHDataset

Torch device: cuda


  def __init__(self, root_dir: Path = Path("dataset\KTH_data"), transform=None):


## Download the dataset and pre-process the dataset

In [None]:
action_space = ['walking', 'running', 'jogging', 'boxing', 'handwaving', 'handclapping']
for action in action_space:
    dl.download_and_extract(action)
    dl.extract_and_save_frames(action)


## Split the data into train, test, validate

In [None]:
image_file_names = []
labels = []

# Create a list of all the image file names and their corresponding action labels
for action in action_space:
    folder = Path("dataset") / "KTH_data" / action
    for f in folder.glob("*.pt"):
        image_file_names.append(f)
        labels.append(action)
        
paths = np.array(image_file_names)
labels = np.array(labels)

In [None]:
# Split the dataset into training, validation, and test sets
# 70% training, 15% validation, 15% test
X_train, X_temp, y_train, y_temp = train_test_split(
    paths, labels, 
    stratify=labels, 
    test_size=0.3, 
    random_state=42
)
# Split the temporary set into validation and test sets
# 50% of the temporary set for validation and 50% for testing (again, 15% each overall)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, 
    stratify=y_temp, 
    test_size=0.5, 
    random_state=42
)


# Data augmentation for training
train_transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.from_numpy(x) if isinstance(x, np.ndarray) else x),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5)
])

val_test_transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.from_numpy(x) if isinstance(x, np.ndarray) else x)
])


train_dataset = KTHDataset(X_train, y_train, transform=train_transform)
val_dataset = KTHDataset(X_val, y_val, transform=val_test_transform)
test_dataset = KTHDataset(X_test, y_test, transform=val_test_transform)

# Create sample weights based on training label frequency

# Map class names to integer indices
class_to_idx = {cls_name: idx for idx, cls_name in enumerate(action_space)}
y_train_indices = np.array([class_to_idx[label] for label in y_train])

class_sample_count = np.bincount(y_train_indices)
class_weights = 1. / class_sample_count
sample_weights = class_weights[y_train_indices]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

In [None]:
# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# test training for loop:
for i, (images, labels) in enumerate(train_loader):
    print(f"Batch {i+1}:")
    print("Images shape:", images.shape)
    print(images)
    break  # Just to test the first batch

Batch 1:
Images shape: torch.Size([32, 120, 160])
tensor([[[0.4824, 0.4510, 0.4745,  ..., 0.4902, 0.4784, 0.5059],
         [0.4745, 0.5059, 0.4941,  ..., 0.4549, 0.5098, 0.4941],
         [0.4824, 0.4667, 0.4824,  ..., 0.5294, 0.5294, 0.5216],
         ...,
         [0.6275, 0.5882, 0.6314,  ..., 0.6275, 0.5843, 0.6118],
         [0.6196, 0.6314, 0.6118,  ..., 0.6392, 0.6196, 0.6039],
         [0.6745, 0.7294, 0.7098,  ..., 0.7882, 0.7686, 0.7647]],

        [[0.3451, 0.3961, 0.3686,  ..., 0.3725, 0.3804, 0.4353],
         [0.3765, 0.4235, 0.3569,  ..., 0.3804, 0.3922, 0.3922],
         [0.3725, 0.3725, 0.3725,  ..., 0.3725, 0.4000, 0.3804],
         ...,
         [1.0000, 0.7961, 0.9451,  ..., 0.6196, 0.6392, 0.6510],
         [0.9882, 0.8549, 0.9882,  ..., 0.9059, 0.9647, 0.9373],
         [0.9882, 0.9608, 0.9882,  ..., 1.0000, 1.0000, 0.9882]],

        [[0.4275, 0.4353, 0.4078,  ..., 0.3961, 0.4314, 0.4000],
         [0.4392, 0.4510, 0.3529,  ..., 0.3725, 0.3843, 0.4235],
        

## TODO
1. Dataset is too large, need to break it up in bits.
2. AutoEncoder training
