In [1]:
import os

import torch

import model.classifiers as classifiers
import model.transforms as transforms

import utils.utils_mediapipe as utils_mediapipe
from config import DATA_CONFIG, TRAIN_CONFIG

In [2]:
exp_id = 1
device = 'cpu'

checkpoint_path = os.path.join(
    TRAIN_CONFIG.train_params.output_data,
    f'experiment_{str(exp_id).zfill(3)}',
    'checkpoint.pth',
)

samples_folder = DATA_CONFIG.mediapipe.points_unified_world_filtered_labeled

label_map = TRAIN_CONFIG.gesture_set.label_map
inv_label_map = TRAIN_CONFIG.gesture_set.inv_label_map

In [3]:
to_keep = TRAIN_CONFIG.transforms_params.to_keep
shape_limit = TRAIN_CONFIG.transforms_params.shape_limit

test_transforms = transforms.TestTransforms(
    to_keep=to_keep,
    shape_limit=shape_limit,
    device=device,
)
label_transforms = transforms.LabelsTransforms(
    shape_limit=shape_limit,
    device=device,
)

model = classifiers.LSTMClassifier(sum(to_keep), len(label_map))
model.to(device)
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.eval()

LSTMClassifier(
  (positional_embeddings): PositionalEncoding()
  (linear1): Linear(in_features=42, out_features=256, bias=True)
  (lstm1): LSTM(256, 256, num_layers=2, batch_first=True)
  (linear2): Linear(in_features=256, out_features=6, bias=True)
)

In [5]:
subject = 101
gesture = 'select'
hand = 'left'
trial = 1

file_path = os.path.join(
    samples_folder,
    f'G{subject}_{gesture}_{hand}_trial{trial}.npy'
)

data = utils_mediapipe.load_points(file_path)

points = test_transforms(data[:, :-1])
labels = label_transforms(data[:, -1] * label_map[gesture])

In [6]:
with torch.no_grad():
    prediction = model(points)

RuntimeError: The size of tensor a (30) must match the size of tensor b (42) at non-singleton dimension 1

In [8]:
prediction_probs, prediction_labels = prediction.max(dim=-1)
prediction_labels

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 4, 4, 4, 4,
        4, 1, 1, 1, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [9]:
labels

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [10]:
accuracy = (prediction_labels == labels).sum() / len(labels)
f'{accuracy.item():.2%}'

'79.13%'