# PoseNet Example

#### **Reference**: https://github.com/rwightman/posenet-pytorch

# 0. What is PoseNet?

#### **Refernce**: https://www.tensorflow.org/lite/examples/pose_estimation/overview

![posenet process](https://www.tensorflow.org/images/lite/models/pose_estimation.gif)

# 1. Set path variables

In [None]:
pwd = !pwd # ['/kaggle/working']
pwd = pwd[0] + '/'
pwd

In [None]:
posenet_path = pwd + 'posenet-pytorch/'
posenet_path

# 2. Clone PoseNet repository

In [None]:
!rm -rf {pwd}/*
!git clone https://github.com/rwightman/posenet-pytorch > /dev/null 2>&1
!ls {posenet_path}

# 3. Download sample images

In [None]:
!python3 {posenet_path}get_test_images.py > /dev/null 2>&1
!ls {pwd}/images

In [None]:
import os

image_path = pwd + 'images/'
image_list = [image for image in sorted(os.listdir(image_path)) if os.path.isfile(os.path.join(image_path, image))]
image_list

## Choose samples

In [None]:
import random

sample_count = 5
image_samples = random.sample(image_list, 5)
image_samples

## Show samples

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

row = sample_count
col = 1
figure = plt.figure(figsize = (10 * sample_count, 50 * col))

for r in range(row):
    for c in range(col):
        ax = figure.add_subplot(row, col, r * col + c + 1)
        ax.axis("off")
        ax.imshow(mpimg.imread(image_path + image_samples[r]))
        ax.set_title(image_samples[r].split('.')[0], fontsize = 20)
plt.show()

# 4. Run PoseNet for image

## **GPU is required**

In [None]:
output_path = pwd + 'output/'
if os.path.isdir(output_path):
    !rm -rf {output_path}
!mkdir {output_path}

!python3 {posenet_path}image_demo.py --model 101 --notxt --image_dir {image_path} --output_dir {output_path} > /dev/null 2>&1

!ls {output_path}

# 5. Compare images

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

row = sample_count
col = 2
figure = plt.figure(figsize = (10 * sample_count, 50 * col))

for r in range(row):
    for c in range(col):
        ax = figure.add_subplot(row, col, r * col + c + 1)
        ax.axis("off")
        if c == 0:   # original image
            ax.imshow(mpimg.imread(image_path + image_samples[r]))
            ax.set_title('(Before) ' + image_samples[r].split('.')[0], fontsize = 20)
        elif c == 1: # append PoseNet image
            ax.imshow(mpimg.imread(output_path + image_samples[r]))
            ax.set_title('(After) ' + image_samples[r].split('.')[0], fontsize = 20)
plt.show()

# 6. Download a video

## Display original video

In [None]:
original_video_name = 'schoolTrim.mp4'


In [None]:
os.path.exists('/kaggle/input/school2/'+original_video_name)

In [None]:
size = !ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=s=x:p=0 {'/kaggle/input/school2/'+original_video_name}
size = size[0]
video_width, video_height = map(int, size.split('x'))
video_width, video_height

In [None]:
from IPython.display import HTML
from base64 import b64encode

src = 'data:video/mp4;base64,' + b64encode(open('/kaggle/input/school2/'+original_video_name, 'rb').read()).decode()
HTML('<video width="%d" height="%d" controls autoplay loof><source src="%s" type="video/mp4"></video>' % (video_width / 2, video_height / 2, src))

# 7. Run PoseNet for video

## Chrome can supports the vp90 codec.

**Reference** : https://github.com/jupyter-widgets/ipywidgets/issues/2559#issuecomment-536536553

In [None]:
%%writefile {posenet_path}video_demo.py

import torch
import cv2
import time
import argparse
from tqdm.auto import tqdm

import posenet

parser = argparse.ArgumentParser()
parser.add_argument('--model', type=int, default = 101)
parser.add_argument('--cam_id', type=str, default = 0)
parser.add_argument('--cam_width', type=int, default = 1920)
parser.add_argument('--cam_height', type=int, default = 1080)
parser.add_argument('--scale_factor', type=float, default = 0.7125)
parser.add_argument('--output', type=str, default = 'output.mp4')
parser.add_argument('--codec', type=str, default = 'vp90')
parser.add_argument('--fps', type=float, default = 25.0)
args = parser.parse_args()


def main():
    model = posenet.load_model(args.model)
    model = model.cuda()
    output_stride = model.output_stride

    cap = cv2.VideoCapture(args.cam_id)
    cap.set(3, args.cam_width)
    cap.set(4, args.cam_height)

    out = cv2.VideoWriter(args.output, cv2.VideoWriter_fourcc(*args.codec), args.fps, (args.cam_width, args.cam_height))
    start = time.time()
    frame_count = 0
    for _ in tqdm(range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))), position = 0):
        try:
            input_image, display_image, output_scale = posenet.read_cap(
                cap, scale_factor = args.scale_factor, output_stride = output_stride)
        except IOError:
            break
        with torch.no_grad():
            input_image = torch.Tensor(input_image).cuda()

            heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model(input_image)

            pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses(
                heatmaps_result.squeeze(0),
                offsets_result.squeeze(0),
                displacement_fwd_result.squeeze(0),
                displacement_bwd_result.squeeze(0),
                output_stride = output_stride,
                max_pose_detections = 10,
                min_pose_score = 0.15)

        keypoint_coords *= output_scale

        overlay_image = posenet.draw_skel_and_kp(
            display_image, pose_scores, keypoint_scores, keypoint_coords,
            min_pose_score = 0.15, min_part_score = 0.1)
        
        frame_count += 1
        out.write(overlay_image)

    cap.release()
    out.release()
    print('Average FPS: ', frame_count / (time.time() - start))
    

if __name__ == "__main__":
    main()

In [None]:
!ffmpeg -i /kaggle/input/school2/schoolTrim.mp4


In [None]:
!ffmpeg -i {original_video_name}

In [None]:
posenet_video_name = 'output.mp4'
codec = 'vp90'
fps = 25.0
!python3 {posenet_path}video_demo.py --model 101 --cam_width {video_width} --cam_height {video_height} \
                                    --cam_id {'/kaggle/input/school2/'+original_video_name} --output {pwd}{posenet_video_name} \
                                    --codec {codec} --fps {fps} > /dev/null
!ls {pwd}

In [None]:
!ffmpeg -i {posenet_video_name}

## 8. Display PoseNet video

In [None]:
src = 'data:video/mp4;base64,' + b64encode(open(pwd + posenet_video_name, 'rb').read()).decode()
HTML('<video width="%d" height="%d" controls autoplay loof><source src="%s" type="video/mp4"></video>' % (video_width / 2, video_height / 2, src))