#### Test on Batch data

In [1]:
import os

import torch

import model.classifiers as classifiers
import model.transforms as transforms

import utils.utils_mediapipe as utils_mediapipe
from config import DATA_CONFIG, TRAIN_CONFIG

In [2]:
exp_id = 1
device = 'cpu'

checkpoint_path = os.path.join(
    TRAIN_CONFIG.train_params.output_data,
    f'experiment_{str(exp_id).zfill(3)}',
    'checkpoint.pth',
)

samples_folder = DATA_CONFIG.mediapipe.points_unified_world_filtered_labeled

label_map = TRAIN_CONFIG.gesture_set.label_map
inv_label_map = TRAIN_CONFIG.gesture_set.inv_label_map

In [3]:
to_keep = TRAIN_CONFIG.transforms_params.to_keep
shape_limit = TRAIN_CONFIG.transforms_params.shape_limit

test_transforms = transforms.TestTransforms(
    to_keep=to_keep,
    shape_limit=shape_limit,
    device=device,
)
label_transforms = transforms.LabelsTransforms(
    shape_limit=shape_limit,
    device=device,
)

model = classifiers.LSTMClassifier(sum(to_keep), len(label_map))
model.to(device)
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.eval()

LSTMClassifier(
  (positional_embeddings): PositionalEncoding()
  (linear1): Linear(in_features=30, out_features=256, bias=True)
  (lstm1): LSTM(256, 256, num_layers=2, batch_first=True)
  (linear2): Linear(in_features=256, out_features=3, bias=True)
)

In [4]:
subject = 101
gesture = 'select'
hand = 'left'
trial = 1

file_path = os.path.join(
    samples_folder,
    f'G{subject}_{gesture}_{hand}_trial{trial}.npy'
)

data = utils_mediapipe.load_points(file_path)

points = test_transforms(data[:, :-1])
labels = label_transforms(data[:, -1] * label_map[gesture])

In [5]:
with torch.no_grad():
    prediction = model(points)

In [6]:
prediction_probs, prediction_labels = prediction.max(dim=-1)
prediction_labels

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
labels

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [8]:
accuracy = (prediction_labels == labels).sum() / len(labels)
f'{accuracy.item():.2%}'

'100.00%'

#### Test on Streaming data

In [22]:
import itertools
import json
import os

import cv2
import pyk4a
import mediapipe as mp
import numpy as np
import torch

import model.classifiers as classifiers
import model.transforms as transforms

import utils.utils_camera_systems as utils_camera_systems
import utils.utils_kalman_filter as utils_kalman_filter
import utils.utils_mediapipe as utils_mediapipe
import utils.utils_unified_format as utils_unified_format
from config import DATA_CONFIG, TRAIN_CONFIG, KALMAN_FILTER_CONFIG

In [23]:
exp_id = 1
device = 'cpu'

checkpoint_path = os.path.join(
    TRAIN_CONFIG.train_params.output_data,
    f'experiment_{str(exp_id).zfill(3)}',
    'checkpoint.pth',
)

samples_folder = DATA_CONFIG.streaming.stream_1

label_map = TRAIN_CONFIG.gesture_set.label_map
inv_label_map = TRAIN_CONFIG.gesture_set.inv_label_map

In [24]:
mp_holistic = mp.solutions.holistic

mp_solver_settings = dict(
    static_image_mode=False,
    model_complexity=2,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
)
mp_solver = mp_holistic.Holistic(**mp_solver_settings)

In [25]:
WINDOW_SIZE = 7

KALMAN_PARAMS = KALMAN_FILTER_CONFIG.init_params.as_dict()
KALMAN_HEURISTICS_FUNC = KALMAN_FILTER_CONFIG.heuristics.as_dict()

CAMERA_PARAMS_PATH = os.path.join(
    samples_folder,
    'calibration_fake.json',
)

image_size, intrinsic = utils_camera_systems.get_camera_params(CAMERA_PARAMS_PATH)
camera_systems = utils_camera_systems.CameraSystems(image_size, intrinsic)
depth_extractor = utils_camera_systems.DepthExtractor(WINDOW_SIZE)

kfs = []
for i in range(utils_unified_format.TOTAL_POINTS_COUNT):
    point = i
    if point >= 18:
        point = 4
    params = KALMAN_FILTER_CONFIG.init_params.as_dict()
    params['sigma_u'] = params.pop('sigma_u_points')[point]
    params['init_Q'] = np.copy(params['init_Q']) * (params['sigma_u'] ** 2)
    kfs.append(utils_kalman_filter.KalmanFilter(**params, **KALMAN_HEURISTICS_FUNC))
kalman_filters = utils_kalman_filter.KalmanFilters(kfs)

In [26]:
to_keep = TRAIN_CONFIG.transforms_params.to_keep

test_transforms = transforms.TestStreamTransforms(
    to_keep=to_keep,
    device=device,
)

model = classifiers.LSTMClassifier(sum(to_keep), len(label_map))
model.to(device)
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.eval()

LSTMClassifier(
  (positional_embeddings): PositionalEncoding()
  (linear1): Linear(in_features=30, out_features=256, bias=True)
  (lstm1): LSTM(256, 256, num_layers=2, batch_first=True)
  (linear2): Linear(in_features=256, out_features=3, bias=True)
)

In [20]:
color_stream_file = os.path.join(
    samples_folder,
    'color.mkv',
)
depth_stream_file = os.path.join(
    samples_folder,
    'depth.mkv',
)

In [9]:
color_cap = cv2.VideoCapture(color_stream_file)
depth_cap = cv2.VideoCapture(depth_stream_file)

mp_solver.reset()
predicted = None

while color_cap.isOpened() and depth_cap.isOpened():
    color_ret, color_image = color_cap.read()
    depth_ret, depth_image = depth_cap.read()
    if color_ret and depth_ret:
        depth_image = depth_image.T
        ### MediaPipe Extractor
        ### ------------------------------
        landmarks = mp_solver.process(color_image)
        joined_landmarks = itertools.chain(
            landmarks.pose_landmarks.landmark if landmarks.pose_landmarks is not None else utils_mediapipe.EMPTY_POSE,
            landmarks.left_hand_landmarks.landmark if landmarks.left_hand_landmarks is not None else utils_mediapipe.EMPTY_HAND,
            landmarks.right_hand_landmarks.landmark if landmarks.right_hand_landmarks is not None else utils_mediapipe.EMPTY_HAND,
        )
        frame_points = utils_mediapipe.landmarks_to_array(joined_landmarks)[:, :3]
        mp_points = frame_points.reshape(-1)

        ### Filtration
        ### ------------------------------
        mp_points = utils_mediapipe.mediapipe_to_unified(
            mp_points.reshape(-1, utils_mediapipe.TOTAL_POINTS_COUNT, 3)
        ).reshape(-1, 3 * utils_unified_format.TOTAL_POINTS_COUNT)

        frame_points = mp_points.reshape(-1, 3)
        frame_points = camera_systems.zero_points_outside_screen(
            frame_points,
            is_normalized=True,
            inplace=True,
        )
        frame_points = camera_systems.normalized_to_screen(
            frame_points,
            inplace=True,
        )

        # depths = depth_extractor.get_depth_in_window(
        #     depth_image,
        #     frame_points,
        #     predicted,
        # )

        # if predicted is None:
        #     kalman_filters.reset([
        #         np.array([[point], [0]])
        #         for point in depths
        #     ])
        # depths_filtered = kalman_filters.update(
        #     depths,
        #     use_heuristic=True,
        #     projection=0,
        # )

        # predicted = kalman_filters.predict(projection=0)
        # depths_filtered = tp.cast(tp.List[float], depths_filtered)
        # predicted = tp.cast(tp.List[float], predicted)

        # frame_points[:, 2] = depths_filtered
        # frame_points = camera_systems.screen_to_world(
        #     frame_points,
        #     inplace=True,
        # )

        # cv2.imshow('Color Frame', color_frame)
        # cv2.imshow('Depth Frame', depth_frame)

        # if cv2.waitKey(25) & 0xFF == ord('q'):
        #     break
        pass

    else:
        break

color_cap.release()
depth_cap.release()

cv2.destroyAllWindows()

IndexError: index 627 is out of bounds for axis 0 with size 576

In [27]:
filename = os.path.join(
    samples_folder,
    'output.mkv',
)

In [28]:
playback = pyk4a.PyK4APlayback(filename)

playback.open()
calib = playback.calibration_raw
calib_json = json.dumps(calib)
playback.close()

In [30]:
playback.open()
while True:
    try:
        frame = playback.get_next_capture()
    except EOFError as err:
        break
playback.close()