# MQTT client

In [1]:
import paho.mqtt.client as mqtt

client = mqtt.Client()

client.connect('192.168.0.173', 1883, 60)

0

# Import pose estimation model

## Define output format

Let's load the JSON file which describes the human pose task.  This is in COCO format, it is the category descriptor pulled from the annotations file.  We modify the COCO category slightly, to add a neck keypoint.  We will use this task description JSON to create a topology tensor, which is an intermediate data structure that describes the part linkages, as well as which channels in the part affinity field each linkage corresponds to.

In [2]:
import os
os.environ['MPLCONFIGDIR'] = os.getcwd() + "/configs/" # Specify MatplotLib config folder

import json
import numpy as np
# Requiere https://github.com/NVIDIA-AI-IOT/trt_pose
import trt_pose.coco
from trt_pose.draw_objects import DrawObjects
from trt_pose.parse_objects import ParseObjects

with open('human_pose.json', 'r') as f:
    human_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(human_pose)

parse_objects = ParseObjects(topology)
draw_objects = DrawObjects(topology)

## Import TensorRT optimized model

Next, we'll load our model. It has been optimized using another Notebook and saved so that we do not need to perform optimization again, we can just load the model. Please note that TensorRT has device specific optimizations, so you can only use an optimized model on similar platforms.

In [3]:
import torch
# Requiere https://github.com/NVIDIA-AI-IOT/torch2trt
from torch2trt import TRTModule

OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'

model_trt = TRTModule()
model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))

<All keys matched successfully>

# Define video-processing pipeline

## Pre-process image for TRT_Pose

Next, let's define a function that will preprocess the image, which is originally in BGR8 / HWC format. It is formated to the default Torch format.

In [4]:
import cv2
import torchvision.transforms as transforms
import PIL.Image

mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
device = torch.device('cuda')

def preprocess(image):
    global device
    device = torch.device('cuda')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = PIL.Image.fromarray(image)
    image = transforms.functional.to_tensor(image).to(device)
    image.sub_(mean[:, None, None]).div_(std[:, None, None])
    return image[None, ...]

## Access video feed

Access images streamed by a WiFi camera on the local network.

In [5]:
WIDTH, HEIGHT = 224, 224 # Imposed by the model

import ipywidgets
from IPython.display import display
import urllib.request

image_w = ipywidgets.Image(format='jpeg')

display(image_w)

url_esp32 = 'http://192.168.0.162/capture'
def fetch_image(url):
    imgResp = urllib.request.urlopen(url)
    imgNp = np.array(bytearray(imgResp.read()),dtype=np.uint8)
    img = cv2.imdecode(imgNp,-1)
    return cv2.resize(img, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA)

Image(value=b'', format='jpeg')

## Get keypoints from the peaks 

In [6]:
def get_keypoint(objects, index, peaks):
    kpoint = []
    human = objects[0][index]
    C = human.shape[0]
    for j in range(C):
        k = int(human[j])
        if k >= 0:
            peak = peaks[0][j][k]   # peak[1]:width, peak[0]:height
            peak = (j, float(peak[0]), float(peak[1]))
            kpoint.append(peak)
            #print('index:%d : success [%5.3f, %5.3f]'%(j, peak[1], peak[2]) )
        else:
            peak = (j, None, None)
            kpoint.append(peak)
            #print('index:%d : None'%(j) )
    return kpoint

## Detect if one hand is raised


In [12]:
def isRaisingHand(poseKeypoints):        
        rightHand_idx, rightHand_y, rightHand_x = poseKeypoints[9]
        leftHand_idx, leftHand_y, leftHand_x = poseKeypoints[10]
        rightShoulder_idx, rightShoulder_y, rightShoulder_x = poseKeypoints[6]
        leftShoulder_idx, leftShoulder_y, leftShoulder_x = poseKeypoints[5]
        
        if rightShoulder_y and leftShoulder_y and leftHand_y:
            print(rightShoulder_y, leftShoulder_y, leftHand_y)
            shoulderSlope = (rightShoulder_y - leftShoulder_y) / (
                rightShoulder_x - leftShoulder_x
            )
            shoulderOri = rightShoulder_y - shoulderSlope * rightShoulder_x
            raisingLeft = leftHand_y < (shoulderSlope * leftHand_x + shoulderOri)
            return raisingLeft
        else:
            return False

## Processing loop

The *execute()* function contains the whole analysis process: 
- Read image
- Pre-process to Torch format
- Infere key-points
- Draw skeleton on the input image
- Update in output window.

In [13]:
try:
    while True:
        image = fetch_image(url_esp32)
        data = preprocess(image)
        cmap, paf = model_trt(data)
        cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
        counts, objects, peaks = parse_objects(cmap, paf) #, cmap_threshold=0.15, link_threshold=0.15)
        for i in range(counts[0]):
            keypoints = get_keypoint(objects, i, peaks)
            if isRaisingHand(keypoints):
                client.publish("lamp", "2")
                print('Raised')
        draw_objects(image, counts, objects, peaks)
        image_w.value = bytes(cv2.imencode('.jpg', image[:, ::-1, :])[1])
except KeyboardInterrupt:
    print('Video processing stopped')

0.6638293862342834 0.6686788201332092 0.7234136462211609
0.5800638198852539 0.6216279864311218 0.44287824630737305
Raised
0.5755125880241394 0.5949109196662903 0.46541109681129456
Raised
0.5491074919700623 0.6145654320716858 0.5089828372001648
0.5462464094161987 0.6149406433105469 0.5093923807144165
0.5255587697029114 0.5854342579841614 0.5381296873092651
0.5270360112190247 0.5270677208900452 0.8666273355484009
0.5339232683181763 0.5439188480377197 0.9788433313369751
0.5334664583206177 0.5327016711235046 0.9769620895385742
0.5240234136581421 0.5787531733512878 0.43668097257614136
Raised
0.5262635946273804 0.5842646360397339 0.4211902320384979
Raised
0.5251557230949402 0.5785633325576782 0.629741370677948
0.5300996899604797 0.5762194395065308 0.5083531141281128
0.5550853610038757 0.59831303358078 0.7438503503799438
Video processing stopped
