# FESDModel

FESD - Fault estimation for skeleton detection - is a suite that aims at finding faults in joints of skeletons, which are detected by human pose estimatiors.

FESDData is the sister project to this notebook, which aims at recording depth and rgb data, as well as populating the data with human poses from variing human pose estimators.

Furthermore, FESTData augments all data based on joint confidence.

FFESDModel aims to develop and evaluate a model based on the faulty and augmented joint data as well as RGBD data.

## Libraries

We need a range of libraries which are imported here. We also define some constants.

In [1]:
import os
import json
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tqdm

import cv2
import pyrealsense2 as rs

sns.set_style()

In [2]:
RECORDING_DIR = Path('H:/Recordings/')

## Data Loading

Firstly we need to import all the recordings into the notebook.


### Load Metadata

It is important to load the metadata, such as the session parameters, the exercises and the recording paths.

In [3]:
recording_jsons = []
for file in os.listdir(RECORDING_DIR):
  if (file.endswith('.json')):
    with open(file=os.path.join(RECORDING_DIR, file), mode='r') as file:
      data = json.load(file)
      recording_jsons.append(data)

len(recording_jsons)

25

Then we load the Exercises from the exercise file.

In [4]:
with open(file="Exercises.json", mode='r') as file:
  exercises_json = json.load(file)['Exercises']

with open(file="JointErrors.json", mode='r') as file:
  joint_error_json = json.load(file)

with open(file="SkeletonErrors.json", mode='r') as file:
  skeleton_error_json = json.load(file)

len(exercises_json)

13

## Load Frame

Here we define the load frame functions. For now we focus on nuitrack recordings.

In [5]:
def id_2_name(i: int):
  return 'frame_' + str(i) + '.yml'

In [180]:
def load_skeletons(skeletons_json, flip: bool=False) -> (np.ndarray, np.ndarray, list[tuple[float, float, float]], list[tuple[float, float, float]]):
  poses = []
  pose_errors = []
  bounding_boxes_2d = [(np.inf, np.inf, np.inf), (0, 0, 0)]
  bounding_boxes_3d = [(np.inf, np.inf, np.inf), (0, 0, 0)]
  for person in skeletons_json:
    joints = np.ndarray(shape=[0, 6])
    errors = []    
    origin = person['Skeleton'][4]

    for joint in person['Skeleton']:
      if (joint['error'] != 1):
        bounding_boxes_2d[0] = np.minimum(bounding_boxes_2d[0], [joint['u'], joint['v'], joint['d']])
        bounding_boxes_2d[1] = np.maximum(bounding_boxes_2d[1], [joint['u'], joint['v'], joint['d']])
        bounding_boxes_3d[0] = np.minimum(bounding_boxes_3d[0], [joint['x'], joint['y'], joint['z']])
        bounding_boxes_3d[1] = np.maximum(bounding_boxes_3d[1], [joint['x'], joint['y'], joint['z']])
      joints = np.append(joints, [[
        joint['u'] - origin['u'],
        joint['v'] - origin['v'],
        joint['d'] - origin['d'],
        joint['x'] - origin['x'],
        joint['y'] - origin['y'],
        joint['z'] - origin['z']
      ]], axis=0) * (-1 if flip else 1)

      errors.append(1 if person['error'] == 1 else joint['error'])
              
    poses.append(joints)
    pose_errors.append(errors)
  
  print(bounding_boxes_2d)
  print(bounding_boxes_3d)

  return np.asarray(poses), np.asarray(pose_errors), bounding_boxes_2d, bounding_boxes_3d

In [195]:
def load_frame(session: json, frame_id: int, flip: bool=False, crop: bool=False, crop_random: bool=False, crop_pad: int=0) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray):
  frame_path = RECORDING_DIR /  session['Cameras'][0]['FileName'] / id_2_name(frame_id)
  frame_file = cv2.FileStorage(str(frame_path), cv2.FileStorage_READ)
  frame = np.asarray( frame_file.getNode('frame').mat()[:,:] )
  rgb, depth = np.split(frame, [3], axis=2)

  with open(file=RECORDING_DIR /  session['Skeleton'], mode='r') as file:
    skeleton_json = json.load(file)[frame_id]
  poses, errors, bounding_boxes_2d, bounding_boxes_3d = load_skeletons(skeleton_json, flip)
  pose_2d, pose_3d = np.split(poses, 2, axis=2)

  if (flip):
    rgb = np.flip(rgb, axis=1)
    depth = np.flip(depth, axis=1)

  if (crop):
    min_x = max(0, int(np.floor(bounding_boxes_2d[0][0])) - crop_pad)
    min_y = max(0, int(np.floor(bounding_boxes_2d[0][1])) - crop_pad)
    max_x = min(rgb.shape[0], int(np.ceil(bounding_boxes_2d[1][0])) + crop_pad)
    max_y = min(rgb.shape[1], int(np.ceil(bounding_boxes_2d[1][1])) + crop_pad)

    if (crop_random):
      min_x = np.random.randint(0, min_x)
      min_y = np.random.randint(0, min_y)
      max_x = np.random.randint(max_x, rgb.shape[0])
      max_y = np.random.randint(max_y, rgb.shape[1])
    
    rgb = rgb[min_x:max_x, min_y:max_y]
    depth = depth[min_x:max_x, min_y:max_y] 
  
  return rgb, depth, pose_2d, pose_3d, errors

In [200]:
rgb, depth, poses_2d, poses_3d, errors = load_frame(recording_jsons[0], 20, True, True, True, 10)
print(rgb.shape)
print(depth.shape)
print(poses_2d.shape)
print(poses_3d.shape)
print(errors.shape)

[array([287.79888916, 147.11325073,   2.45382833]), array([421.3939209 , 476.17895508,   2.87333059])]
[array([-0.13871242, -1.12756002,  2.45382833]), array([0.43398166, 0.37994158, 2.87333059])]
(215, 542, 3)
(215, 542, 1)
(1, 25, 3)
(1, 25, 3)
(1, 25)


In [157]:
poses_2d

array([[[ 3.49257782e+02,  3.04989929e+02,  2.61749840e+00],
        [-1.84570312e-01, -1.57876678e+02, -1.63670063e-01],
        [ 4.53582764e-01,  1.26848038e+02,  1.31840229e-01],
        [ 2.45849609e-01, -4.82750244e+01, -1.82793140e-02],
        [-0.00000000e+00, -0.00000000e+00, -0.00000000e+00],
        [-2.71636963e-01, -1.06412277e+02, -1.03270292e-01],
        [-4.05320740e+01,  1.04062683e+02,  9.98923779e-02],
        [ 5.69803772e+01, -4.21243286e+01, -3.98993492e-03],
        [-6.95727234e+01, -1.72396545e+01,  4.21814919e-02],
        [ 7.21361389e+01,  2.93243713e+01, -4.98199463e-02],
        [ 3.49257782e+02,  3.04989929e+02,  2.61749840e+00],
        [-2.71636963e-01, -1.06412277e+02, -1.03270292e-01],
        [ 4.05550842e+01,  1.02272476e+02,  8.75868797e-02],
        [-5.27978516e+01, -3.88483887e+01,  5.90515137e-03],
        [ 5.99971619e+01, -2.09994202e+01,  2.67663002e-02],
        [-6.14588928e+01,  3.31505737e+01, -3.33003998e-02],
        [ 3.49257782e+02

## Train Model

In the following we define the training function and train a network on the training data.

In [None]:
from RD3D import train

In [None]:
# training
def train_salient(train_loader, model, optimizer, criterion, scheduler, epoch, opt):
    # multi-scale training  
    size_rates = [0.75, 1, 1.25]

    model.train()
    loss_record = AvgMeter()
    for i, pack in enumerate(train_loader, start=1):
        for rate in size_rates:
            optimizer.zero_grad()
            images, gts, depths = pack
            images = images.cuda()
            gts = gts.cuda()
            depths = depths.cuda()

            # multi-scale training samples
            trainsize = int(round(opt.trainsize * rate / 32) * 32)
            if rate != 1:
                images = F.upsample(images, size=(trainsize, trainsize), mode='bilinear', align_corners=True)
                images = images.unsqueeze(2)
                gts = F.upsample(gts, size=(trainsize, trainsize), mode='bilinear', align_corners=True)

                depths = F.upsample(depths, size=(trainsize, trainsize), mode='bilinear', align_corners=True)
                depths = depths.unsqueeze(2)
                images = torch.cat([images, depths], 2)

            if rate == 1:
                images = images.unsqueeze(2)
                depths = depths.unsqueeze(2)
                images = torch.cat([images, depths], 2)

            # forward
            pred_s = model(images)
            # TODO Calculate different loss based on the error label
            loss = criterion(pred_s, gts)

            loss.backward()
            clip_gradient(optimizer, opt.clip)
            optimizer.step()
            scheduler.step()
            if rate == 1:
                loss_record.update(loss.data, opt.batchsize)

        if i % 100 == 0 or i == len(train_loader):
            logger.info('Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], Loss: {:.4f}'.
                        format(epoch, opt.epochs, i, len(train_loader),
                               loss_record.show()))