NVIDIA-AI-IOT · MikyasDesta · Nov 19, 2020 · Nov 20, 2020 · Nov 30, 2020 · Nov 30, 2020
diff --git a/tasks/hand_pose/README.md b/tasks/hand_pose/README.md
@@ -0,0 +1,68 @@
+# Hand Pose Estimation And Classification
+
+This project is an extention of TRT Pose for Hand Pose Detection. The project includes 
+
+- Pretrained models for hand pose estimation capable of running in real time on Jetson Xavier NX.
+
+- Scripts for applications of Hand Pose Estimation
+
+  -  Hand gesture recoginition (hand pose classification) 
+
+  -  Cursor control 
+
+  -  Mini-Paint type of application 
+
+- Pretrained model for gesture recoginition 
+
+## Getting Started 
+
+### Step 1 - Install trt_pose and it's dependencies 
+
+Make sure to follow all the instructions from trt_pose and install all it's depenedencies. 
+Follow the following instruction from https://github.com/NVIDIA-AI-IOT/trt_pose. 
+
+### Step 2 - Install dependecies for hand pose 
+
+    pip install traitlets
+
+
+### Step 3 - Run hand pose and it's applications 
+
+A) Hand Pose demo 
+
+   - Open and follow live_hand_pose.ipynb notebook. 
+
+B) Hand gesture recoginition (hand pose classification) 
+   - Install dependecies
+      - scikit-learn 
+         - pip install -U scikit-learn 
+         - or install it from the source 
+   The current gesture classification model supports six classes (fist, pan, stop, fine, peace, no hand). 
+   More gestures can be added by a simple process of creating your own dataset and training it on an svm model. 
+   An SVM model weight is provided for inference.
+
+   To make your own hand gesture classification from the hand pose estimation, follow the following steps 
+
+   - Create your own dataset using the gesture_data_collection.ipynb or gesture_data_collection_with_pose.ipynb. 
+     This will allow you to create the type of gestures you want to classify. (eg. tumbs up, fist,etc). 
+     This notebook will automatically create a dataset with images and labels that is ready to be trained for gesture classification.
+
+   - Train using the train_gesture_classification.ipynb notebook file. It uses an SVM from scikit-learn. 
+     Other types of models can also be experimented. 
+
+ C) Cursor control application
+
+    - Install dependecies 
+       - pyautogui 
+          - python3 -m pip install pyautogui
+          - On jetson install it from the source 
+
+    - Open and follow the cursor_control_live_demo.ipynb notebook. 
+    - This will allow you to control your mouse cursor on your desktop. It uses the hand gesture classification. 
+      When your hand geture is pan, you can control the cursor. when it is stop, it's left click. 
+
+D) Mini-Paint
+
+The model was trained using the training script in trt_pose and the hand pose data collected in Nvidia.
+
+Model details: resnet18
diff --git a/tasks/hand_pose/cursor_control_live_demo.ipynb b/tasks/hand_pose/cursor_control_live_demo.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Matplotlib created a temporary config/cache directory at /tmp/matplotlib-kjp96j9b because the default path (/home/mikyas/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "import cv2\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.image as mpimg \n",
+    "import trt_pose.coco\n",
+    "import math\n",
+    "import os\n",
+    "import numpy as np\n",
+    "import traitlets\n",
+    "import pickle \n",
+    "import pyautogui\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open('hand_pose.json', 'r') as f:\n",
+    "    hand_pose = json.load(f)\n",
+    "\n",
+    "topology = trt_pose.coco.coco_category_to_topology(hand_pose)\n",
+    "import trt_pose.models\n",
+    "\n",
+    "num_parts = len(hand_pose['keypoints'])\n",
+    "num_links = len(hand_pose['skeleton'])\n",
+    "\n",
+    "model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()\n",
+    "import torch\n",
+    "\n",
+    "\n",
+    "WIDTH = 256\n",
+    "HEIGHT = 256\n",
+    "data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()\n",
+    "\n",
+    "if not os.path.exists('resnet18_244x224_epoch_4150_trt.pth'):\n",
+    "    MODEL_WEIGHTS = 'resnet18_244x224_epoch_4150.pth'\n",
+    "    model.load_state_dict(torch.load(MODEL_WEIGHTS))\n",
+    "    import torch2trt\n",
+    "    model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)\n",
+    "    OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n",
+    "    torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)\n",
+    "\n",
+    "\n",
+    "OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n",
+    "from torch2trt import TRTModule\n",
+    "\n",
+    "model_trt = TRTModule()\n",
+    "model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from trt_pose.draw_objects import DrawObjects\n",
+    "from trt_pose.parse_objects import ParseObjects\n",
+    "\n",
+    "parse_objects = ParseObjects(topology,cmap_threshold=0.15, link_threshold=0.15)\n",
+    "draw_objects = DrawObjects(topology)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import torchvision.transforms as transforms\n",
+    "import PIL.Image\n",
+    "\n",
+    "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()\n",
+    "std = torch.Tensor([0.229, 0.224, 0.225]).cuda()\n",
+    "device = torch.device('cuda')\n",
+    "\n",
+    "def preprocess(image):\n",
+    "    global device\n",
+    "    device = torch.device('cuda')\n",
+    "    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
+    "    image = PIL.Image.fromarray(image)\n",
+    "    image = transforms.functional.to_tensor(image).to(device)\n",
+    "    image.sub_(mean[:, None, None]).div_(std[:, None, None])\n",
+    "    return image[None, ...]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.svm import SVC\n",
+    "clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from preprocessdata import preprocessdata\n",
+    "preprocessdata = preprocessdata(topology, num_parts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "svm_train = False\n",
+    "if svm_train:\n",
+    "    clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, labels_train, hand.labels_test)\n",
+    "    filename = 'svmmodel.sav'\n",
+    "    pickle.dump(clf, open(filename, 'wb'))\n",
+    "else:\n",
+    "    filename = 'svmmodel_new.sav'\n",
+    "    clf = pickle.load(open(filename, 'rb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from jetcam.usb_camera import USBCamera\n",
+    "from jetcam.csi_camera import CSICamera\n",
+    "from jetcam.utils import bgr8_to_jpeg\n",
+    "\n",
+    "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n",
+    "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n",
+    "\n",
+    "camera.running = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f735c3a4f55842d3bde40004c969478b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Image(value=b'', format='jpeg', height='256', width='256')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import ipywidgets\n",
+    "from IPython.display import display\n",
+    "\n",
+    "\n",
+    "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n",
+    "display(image_w)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "screenWidth, screenHeight = pyautogui.size()\n",
+    "p_text = 'none'\n",
+    "pyautogui.FAILSAFE = False\n",
+    "def control_cursor(text, joints):\n",
+    "    global p_text\n",
+    "    if p_text!=\"stop\" and text==\"stop\":\n",
+    "        pyautogui.click()\n",
+    "    if text == \"pan\":\n",
+    "        pyautogui.moveTo(((joints[8][0])*1000)/256, ((joints[8][1])*700)/256)\n",
+    "        #pyautogui.moveTo((joints[8][0]), (joints[8][1]))\n",
+    "    p_text = text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def execute(change):\n",
+    "    image = change['new']\n",
+    "    data = preprocess(image)\n",
+    "    cmap, paf = model_trt(data)\n",
+    "    cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n",
+    "    counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)\n",
+    "    draw_objects(image, counts, objects, peaks)\n",
+    "    joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n",
+    "    dist_bn_joints = preprocessdata.find_distance(joints)\n",
+    "    gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])\n",
+    "    gesture_joints = gesture[0]\n",
+    "    preprocessdata.prev_queue.append(gesture_joints)\n",
+    "    preprocessdata.prev_queue.pop(0)\n",
+    "    preprocessdata.print_label(image, preprocessdata.prev_queue)\n",
+    "    control_cursor(preprocessdata.text, joints)\n",
+    "    image_w.value = bgr8_to_jpeg(image)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "execute({'new': camera.value})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "camera.observe(execute, names='value')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#camera.unobserve_all()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#camera.running = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}