In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from pose_dataset import PoseData

In [27]:
INPUT_SIZE = 33 * 3
HIDDEN_SIZE = 64
NUM_LAYERS = 2 # number of RNNs to stack
NUM_CLASSES = 9 # number of categories

LEARNING_RATE = 0.003

In [28]:
TIME_DIM = 1
BATCH_DIM = 0
COORD_DIM = 2

In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [30]:
class PoseScoringModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(PoseScoringModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, X):
        h0 = torch.zeros(self.num_layers, X.size(BATCH_DIM), self.hidden_size).to(device)
        out, _ = self.gru(X, h0)
        # out: batch x time x hidden
        out = out[:, -1, :]
        # out: batch x hidden
        out = self.fc(out)
        return out

In [31]:
model = PoseScoringModel(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, NUM_CLASSES)

In [32]:
loss = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [33]:
NUM_WORKERS = 2

In [34]:
import os
import glob
import shutil
import random

# Set random seed for reproducibility (optional)
# random.seed(42)

# Set the training ratio (90% training, 10% testing)
train_ratio = 0.9

# Define target directories
train_dir = "../data/train"
test_dir = "../data/test"

# Create directories if they do not exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get a sorted list of all data files
data_files = sorted(glob.glob("../data/data_*.mat"))
data_files = data_files

for data_file in data_files:
    # Get the base filename without extension, e.g., "data_000"
    base_name = os.path.splitext(os.path.basename(data_file))[0]
    parts = base_name.split('_')
    if len(parts) != 2:
        print(f"Skipping invalid file name: {data_file}")
        continue
    number = parts[1]  # e.g., "000"
    
    # Construct the corresponding metric filename
    metric_file = f"../data/metric_{number}.mat"
    
    # Check if the corresponding metric file exists
    if not os.path.exists(metric_file):
        print(f"Warning: {metric_file} not found for {data_file}")
        continue

    # Choose destination based on random split
    destination = train_dir if random.random() < train_ratio else test_dir

    # Move both files
    shutil.move(data_file, os.path.join(destination, os.path.basename(data_file)))
    shutil.move(metric_file, os.path.join(destination, os.path.basename(metric_file)))
    
    print(f"Moved pair ({data_file}, {metric_file}) to {destination}")

In [35]:
train_data = PoseData("../data/train")
test_data = PoseData("../data/test")

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True)
# train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
# test_dataloader = DataLoader(test_data, batch_size=32, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)

In [36]:
import os
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, Subset
from torchmetrics.classification import MulticlassF1Score

EPOCHS = 50  # Number of epochs for training
model.train()  # Set the model to training mode

# Create a directory to save snapshots if it doesn't exist
os.makedirs("snapshots", exist_ok=True)
# Number of splits for k-fold cross-validation
k_folds = 5
batch_size = 32
kf = KFold(n_splits=k_folds, shuffle=True)

indices = list(range(len(train_data)))

# Convert dataset to a tensor for splitting
for fold, (train_idx, val_idx) in enumerate(kf.split(indices)):
    print(f"Fold {fold + 1}/{k_folds}")

    train_subset = Subset(train_data, train_idx)
    val_subset = Subset(train_data, val_idx)
    
    # Create dataloaders for these subsets
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
    
    
    for epoch in range(EPOCHS):
        model.train()
        for i, (inputs, targets) in enumerate(train_loader):
            if inputs.shape[0] != 32:
                continue
            inputs = inputs[:, :, :, :3].flatten(2)

            # Forward pass
            outputs = model(inputs)
            targets = targets.squeeze(1)


            loss_value = loss(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()

            if i % 10 == 0:
                print(f"Fold [{fold + 1}/{k_folds}], Epoch [{epoch + 1}/{EPOCHS}], "
                      f"Step [{i + 1}/{len(train_loader)}], Loss: {loss_value.item():.4f}")

        # Evaluate on validation set
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        f1_metric = MulticlassF1Score(num_classes=9, average='micro').to(device)

        with torch.no_grad():
            for inputs, targets in val_loader:
                if inputs.shape[0] != 32:
                    continue
                inputs = inputs.to(device)
                inputs = inputs[:, :, :, :3].flatten(2)

                targets = targets.to(device)  # shape: [batch_size, num_labels], values ∈ {0, 1}
                targets = targets.squeeze(1)

                print(f"TARGETS {targets.shape}, INPUTS{inputs.shape}")

                logits = model(inputs)        # shape: [batch_size, num_labels]
                preds = torch.sigmoid(logits) > 0.5  # threshold predictions → bool

                f1_metric.update(preds.int(), targets.int())
                
        f1_score = f1_metric.compute()
        print(f"F1-score (micro): {f1_score.item():.4f}")
        f1_metric.reset()
        # Save a snapshot of the model at each epoch
        torch.save(model.state_dict(), f"snapshots/model_fold{fold + 1}_epoch{epoch + 1}.pth")

Fold 1/5
Fold [1/5], Epoch [1/50], Step [1/5], Loss: 0.6988
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.6319
Fold [1/5], Epoch [2/50], Step [1/5], Loss: 0.6883
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.6354
Fold [1/5], Epoch [3/50], Step [1/5], Loss: 0.6730
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.6944
Fold [1/5], Epoch [4/50], Step [1/5], Loss: 0.6525
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.8021
Fold [1/5], Epoch [5/50], Step [1/5], Loss: 0.5669
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.8021
Fold [1/5], Epoch [6/50], Step [1/5], Loss: 0.5288
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.7882
Fold [1/5], Epoch [7/50], Step [1/5], Loss: 0.6072
TARGETS torch.Size([32, 9]), INPUTStorch.Size([32, 36, 99])
F1-score (micro): 0.7847
Fold [1/5], Epoch [8/50], Step [1/5], L

In [38]:
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
eval_loss = 0

f1_metric = MulticlassF1Score(num_classes=9, average='micro').to(device)

with torch.no_grad():  # Disable gradient computation for evaluation
    for inputs, targets in test_dataloader:
        inputs = inputs.to(device)
        inputs = inputs[:, :, :, :3].flatten(2)

        targets = targets.to(device)  # shape: [batch_size, num_labels], values ∈ {0, 1}
        targets = targets.squeeze(1)
        logits = model(inputs)        # shape: [batch_size, num_labels]
        preds = torch.sigmoid(logits) > 0.5  # threshold predictions → bool

        f1_metric.update(preds.int(), targets.int())

# Compute final F1
f1_score = f1_metric.compute()
print(f"F1-score (micro): {f1_score.item():.4f}")

# Reset for next epoch
f1_metric.reset()

F1-score (micro): 0.8562


In [39]:
torch.save(model.state_dict(), "../model/model.pt")