In [1]:
import json
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg 
import trt_pose.coco
import math
import os
import numpy as np
import traitlets
import pickle 


In [2]:
with open('preprocess/hand_pose.json', 'r') as f:
    hand_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(hand_pose)
import trt_pose.models

num_parts = len(hand_pose['keypoints'])
num_links = len(hand_pose['skeleton'])

model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()
import torch


WIDTH = 224
HEIGHT = 224
data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()

if not os.path.exists('model/hand_pose_resnet18_att_244_244_trt.pth'):
    MODEL_WEIGHTS = 'model/hand_pose_resnet18_att_244_244_trt.pth'
    model.load_state_dict(torch.load(MODEL_WEIGHTS))
    import torch2trt
    model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)
    OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)


OPTIMIZED_MODEL = 'model/hand_pose_resnet18_att_244_244_trt.pth'
from torch2trt import TRTModule

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))



[08/19/2023-15:40:19] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.


<All keys matched successfully>

In [3]:
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

parse_objects = ParseObjects(topology,cmap_threshold=0.15, link_threshold=0.15)
draw_objects = DrawObjects(topology)

In [4]:

import torchvision.transforms as transforms
import PIL.Image

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

In [5]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))

In [6]:
from preprocessdata import preprocessdata
preprocessdata = preprocessdata(topology, num_parts)

In [7]:
from dataloader import dataloader
path = '/home/xigong/trt_pose/tasks/hand_pose/data_collection'
train_label = "/hand_dataset_train_nis/hand_dataset_train_nis.json"
test_label = "/hand_dataset_test_nis/hand_dataset_test_nis.json"
hand = dataloader(path, train_label, test_label)

Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.
Input image exists.


In [8]:
def data_preprocess(images):
    dist_bn_joints_all_data = []
    for im in images:
        im = im[:, ::-1, :]
        data_im = preprocess(im)
        cmap, paf = model_trt(data_im)
        cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
        counts, objects, peaks = parse_objects(cmap, paf)
        joints = preprocessdata.joints_inference(im, counts, objects, peaks)
        dist_bn_joints = preprocessdata.find_distance(joints)
        dist_bn_joints_all_data.append(dist_bn_joints)
    return dist_bn_joints_all_data

In [9]:
train_images, labels_train = hand.smaller_dataset(hand.train_images,100,6)

In [10]:
joints_train = data_preprocess(hand.train_images)
joints_test = data_preprocess(hand.test_images)

In [11]:
svm_train = True
if svm_train:
    clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, hand.labels_train, hand.labels_test)
    filename = 'svmmodel_plus.sav'
    pickle.dump(clf, open(filename, 'wb'))
else:
    filename = 'svmmodel.sav'
    clf = pickle.load(open(filename, 'rb'))

In [12]:
preprocessdata.svm_accuracy(clf.predict(joints_test), hand.labels_test)


0.9028436018957346

In [13]:
clf.predict([joints_test[40],[0]*num_parts*num_parts])

array([1, 6])

In [14]:
clf.predict(joints_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3,
       3, 3, 3, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 3, 4, 6, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 4, 4, 4, 4, 6, 3,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 6, 6, 4, 4,
       4, 4, 4, 4, 4, 6, 6, 4, 4, 4, 4, 4, 4, 6, 4, 4, 4, 4, 1, 5, 5, 1,
       5, 1, 5, 5, 5, 5, 4, 5, 5, 5, 4, 1, 1, 1, 5,

In [15]:
from jetcam.usb_camera import USBCamera
from jetcam.csi_camera import CSICamera
from jetcam.utils import bgr8_to_jpeg

camera = USBCamera(width=WIDTH, height=HEIGHT)
#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)

camera.running = True



In [16]:
import ipywidgets
from IPython.display import display


image_w = ipywidgets.Image(format='jpeg', width=224, height=224)
display(image_w)

Image(value=b'', format='jpeg', height='224', width='224')

In [17]:
with open('/home/xigong/trt_pose/tasks/hand_pose/preprocess/gesture.json', 'r') as f:
    gesture = json.load(f)
gesture_type = gesture["classes_nis"]

In [18]:
def execute(change):
    image = change['new']
    data = preprocess(image)
    cmap, paf = model_trt(data)
    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
    counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)
    draw_objects(image, counts, objects, peaks)
    joints = preprocessdata.joints_inference(image, counts, objects, peaks)
    dist_bn_joints = preprocessdata.find_distance(joints)
    gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])
    gesture_joints = gesture[0]
    preprocessdata.prev_queue.append(gesture_joints)
    preprocessdata.prev_queue.pop(0)
    preprocessdata.print_label(image, preprocessdata.prev_queue,gesture_type)
    image_w.value = bgr8_to_jpeg(image)


In [19]:
execute({'new': camera.value})

In [20]:
camera.observe(execute, names='value')

In [21]:
camera.unobserve_all()

In [22]:
#camera.running = False