## Imports

In [1]:
# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Torch device:", device) # Quick check to see if we're using GPU or CPU.


from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from pathlib import Path

# Personal imports
import dataset.download_and_preprocess as dl
from dataset.dataloader import KTHDataset
from autoencoder.autoencoder import AutoencoderModel


Torch device: cuda


## Download the dataset and pre-process the dataset

In [2]:
action_space = ['walking', 'running', 'jogging', 'boxing', 'handwaving', 'handclapping']
for action in action_space:
    should_extract = dl.download_and_extract(action, overwrite=False)
    dl.extract_and_save_frames(action, extraction=should_extract)

## Split the data into train, test, validate

In [3]:
image_file_names = []
labels = []

# Create a list of all the image file names and their corresponding action labels
for action in action_space:
    folder = Path("dataset") / "KTH_data" / action
    for f in folder.glob("*.pt"):
        image_file_names.append(f)
        labels.append(action)
        
paths = np.array(image_file_names)
labels = np.array(labels)

In [4]:
# Split the dataset into training, validation, and test sets
# 70% training, 15% validation, 15% test
X_train, X_temp, y_train, y_temp = train_test_split(
    paths, labels, 
    stratify=labels, 
    test_size=0.3, 
    random_state=42
)
# Split the temporary set into validation and test sets
# 50% of the temporary set for validation and 50% for testing (again, 15% each overall)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, 
    stratify=y_temp, 
    test_size=0.5, 
    random_state=42
)


# Data augmentation for training
train_transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.from_numpy(x) if isinstance(x, np.ndarray) else x),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5)
])

val_test_transform = transforms.Compose([
    transforms.Lambda(lambda x: torch.from_numpy(x) if isinstance(x, np.ndarray) else x)
])


train_dataset = KTHDataset(X_train, y_train, transform=train_transform)
val_dataset = KTHDataset(X_val, y_val, transform=val_test_transform)
test_dataset = KTHDataset(X_test, y_test, transform=val_test_transform)

# Create sample weights based on training label frequency

# Map class names to integer indices
class_to_idx = {cls_name: idx for idx, cls_name in enumerate(action_space)}
y_train_indices = np.array([class_to_idx[label] for label in y_train])

class_sample_count = np.bincount(y_train_indices)
class_weights = 1. / class_sample_count
sample_weights = class_weights[y_train_indices]
sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)

In [5]:
# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
test = AutoencoderModel(latent_dim = 100).to(device)

# test training for loop:
for i, (images, labels) in enumerate(train_loader):
    image = images.unsqueeze(1)  # Remove the channel dimension
    print(images.shape)
    
    image = image.to(device)
    test.forward(image)
    break  # Just to test the first batch


torch.Size([32, 120, 160])
prior: torch.Size([32, 1, 120, 160])
encoded: torch.Size([32, 128, 15, 20])
latent: torch.Size([32, 100])
reconstructed: torch.Size([32, 1, 120, 160])


## TODO
1. AutoEncoder training
