Run this and restart your machine when it tells you to.

In [None]:
!pip install openmim
!mim install mmengine "mmcv>=2.0.1" "mmdet>=3.1.0" "mmpose>=1.1.0"
!git clone https://github.com/open-mmlab/mmpose.git

Collecting openmim
  Downloading openmim-0.3.9-py2.py3-none-any.whl (52 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting colorama (from openmim)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting model-index (from openmim)
  Downloading model_index-0.1.11-py3-none-any.whl (34 kB)
Collecting opendatalab (from openmim)
  Downloading opendatalab-0.0.10-py3-none-any.whl (29 kB)
Collecting ordered-set (from model-index->openmim)
  Downloading ordered_set-4.1.0-py3-none-any.whl (7.6 kB)
Collecting pycryptodome (from opendatalab->openmim)
  Downloading pycryptodome-3.19.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
Collecting openxlab (from

Looking in links: https://download.openmmlab.com/mmcv/dist/cu118/torch2.1.0/index.html
Collecting mmengine
  Downloading mmengine-0.10.1-py3-none-any.whl (450 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m450.3/450.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mmcv>=2.0.1
  Downloading https://download.openmmlab.com/mmcv/dist/cu118/torch2.1.0/mmcv-2.1.0-cp310-cp310-manylinux1_x86_64.whl (99.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.3/99.3 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mmdet>=3.1.0
  Downloading mmdet-3.2.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mmpose>=1.1.0
  Downloading mmpose-1.2.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting addict (from m

Change the video input str to whatever you named your video

In [None]:
import logging
import mimetypes
import os
import time
from argparse import ArgumentParser

import cv2
import json_tricks as json
import mmcv
import mmengine
import numpy as np
from mmengine.logging import print_log

from mmpose.apis import inference_topdown
from mmpose.apis import init_model as init_pose_estimator
from mmpose.evaluation.functional import nms
from mmpose.registry import VISUALIZERS
from mmpose.structures import merge_data_samples, split_instances
from mmpose.utils import adapt_mmdet_pipeline

from mmdet.apis import inference_detector, init_detector

def count_digits(kpt: list)-> int:
  # strategy, count the number of tips that are
  # substantially higher than knuckles
  tips = kpt[[8, 12, 16, 20]] # 4 is thumb, we ignore it
  mean_knuckle = np.mean(kpt[[17, 13, 9, 5]], axis=0)
  raised_digits = 0
  for pt in tips:
    if pt[1] < mean_knuckle[1]:
      raised_digits += 1
  return raised_digits


def process_one_image(img,
                      detector,
                      pose_estimator,
                      visualizer=None):

    # predict bbox
    det_result = inference_detector(detector, img)
    pred_instance = det_result.pred_instances.cpu().numpy()
    bboxes = np.concatenate(
        (pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
    bboxes = bboxes[np.logical_and(pred_instance.labels == 0,
                                   pred_instance.scores > 0.3)]
    bboxes = bboxes[nms(bboxes, 0.3), :4]

    # predict keypoints
    pose_results = inference_topdown(pose_estimator, img, bboxes)
    data_samples = merge_data_samples(pose_results)

    # show the results - visualizer stuff
    if isinstance(img, str):
        img = mmcv.imread(img, channel_order='rgb')
    elif isinstance(img, np.ndarray):
        img = mmcv.bgr2rgb(img)

    visualizer.add_datasample('result',
                              img,
                              data_sample=data_samples,
                              draw_gt=False,
                              draw_heatmap=False,
                              draw_bbox='store_true',
                              show_kpt_idx=True,
                              skeleton_style='mmpose',
                              show=False,
                              wait_time=0.001,
                              kpt_thr=0.1)

    # if there is no instance detected, return None
    return data_samples.get('pred_instances', None)


def process_hand(video_path):
    # arguments for configs and video
    args = {}
    args['det_config'] = 'mmpose/demo/mmdetection_cfg/rtmdet_nano_320-8xb32_hand.py'
    args['det_checkpoint'] = 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth'
    args['pose_config'] = 'mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py'
    args['pose_checkpoint'] = 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth'
    #args['input'] = 'counting_test.mp4' # Change name of video to whatever here
    args['output_root'] = 'vis_results' # Change output root to whatever here
    args['device'] = 'cpu'# 'cuda:0' # if you have cuda

    # prepare output file
    output_file = None
    mmengine.mkdir_or_exist(args['output_root'])
    output_file = os.path.join(args['output_root'],
                                   os.path.basename(video_path))

    args['pred_save_path'] = f"{args['output_root']}/results_{os.path.splitext(os.path.basename(video_path))[0]}.json"

    # build detector
    detector = init_detector(args['det_config'], args['det_checkpoint'], device=args['device'])
    detector.cfg = adapt_mmdet_pipeline(detector.cfg)

    # build pose estimator
    pose_estimator = init_pose_estimator(
        args['pose_config'],
        args['pose_checkpoint'],
        device=args['device'],
        cfg_options=dict(model=dict(test_cfg=dict(output_heatmaps=False))))

    # build visualizer
    pose_estimator.cfg.visualizer.radius = 3
    pose_estimator.cfg.visualizer.alpha = 0.8
    pose_estimator.cfg.visualizer.line_width = 1
    visualizer = VISUALIZERS.build(pose_estimator.cfg.visualizer)
    # the dataset_meta is loaded from the checkpoint and
    # then pass to the model in init_pose_estimator
    visualizer.set_dataset_meta(
        pose_estimator.dataset_meta, skeleton_style='mmpose')

    input_type = mimetypes.guess_type(video_path)[0].split('/')[0]

    # intialize video and video writer for output
    cap = cv2.VideoCapture(video_path)
    video_writer = None
    pred_instances_list = []
    frame_idx = 0

    while cap.isOpened():
        # read a frame
        success, frame = cap.read()
        frame_idx += 1

        if not success: # end of video
          break

        # topdown pose estimation
        pred_instances = process_one_image(frame, detector,
                                           pose_estimator, visualizer)
        # count fingers up from keypoints
        kpts = [np.array(pred_instances.keypoints[i]) for i in range(len(pred_instances.keypoints))]
        print(f'frame_{frame_idx}, digits: {[count_digits(kpts[i]) for i in range(len(pred_instances.keypoints))]}')

        # save predictions
        pred_instances_list.append(
            dict(frame_id=frame_idx,
                 instances=split_instances(pred_instances)))


        # output videos
        frame_vis = visualizer.get_image() # get frame
        if video_writer is None: # first frame: initiate video_writer
          fourcc = cv2.VideoWriter_fourcc(*'mp4v')
          video_writer = cv2.VideoWriter(output_file,fourcc,25,(frame_vis.shape[1], frame_vis.shape[0]))

        video_writer.write(mmcv.rgb2bgr(frame_vis)) # write frame

    video_writer.release()
    cap.release()

    with open(args['pred_save_path'], 'w') as f:
      json.dump(
          dict(
              meta_info=pose_estimator.dataset_meta,
              instance_info=pred_instances_list), f, indent='\t')
      print(f"predictions have been saved at {args['pred_save_path']}")

    if output_file:
        input_type = input_type.replace('webcam', 'video')
        print_log(
            f'the output {input_type} has been saved at {output_file}',
            logger='current',
            level=logging.INFO)

process_hand('counting_test.mp4')

Loads checkpoint by http backend from path: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth


Downloading: "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth" to /root/.cache/torch/hub/checkpoints/rtmdet_nano_8xb32-300e_hand-267f9c8f.pth


Loads checkpoint by http backend from path: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth


Downloading: "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth" to /root/.cache/torch/hub/checkpoints/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


frame_1, digits: [2]
frame_2, digits: [3]
frame_3, digits: [3]
frame_4, digits: [3]
frame_5, digits: [3]
frame_6, digits: [3]
frame_7, digits: [4]
frame_8, digits: [4]
frame_9, digits: [4]
frame_10, digits: [4]
frame_11, digits: [4]
frame_12, digits: [4]
frame_13, digits: [4]
frame_14, digits: [4]
frame_15, digits: [4]
frame_16, digits: [4]
frame_17, digits: [4]
frame_18, digits: [4]
frame_19, digits: [4]
frame_20, digits: [4]
frame_21, digits: [4]
frame_22, digits: [4]
frame_23, digits: [4]
frame_24, digits: [4]
frame_25, digits: [4]
frame_26, digits: [4]
frame_27, digits: [3]
frame_28, digits: [3]
frame_29, digits: [2]
frame_30, digits: [2]
frame_31, digits: [4]
frame_32, digits: [1]
frame_33, digits: [2]
frame_34, digits: [4]
frame_35, digits: [2]
frame_36, digits: [2]
frame_37, digits: [2]
frame_38, digits: [2]
frame_39, digits: [2]
frame_40, digits: [2]
frame_41, digits: [2]
frame_42, digits: [2]
frame_43, digits: [2]
frame_44, digits: [2]
frame_45, digits: [2]
frame_46, digits: [

