<a href="https://colab.research.google.com/github/NbtKmy/gc_workshops/blob/main/HumanPoseEstimation_OpenCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Human Pose Estimation mit OpenCV


https://github.com/spmallick/learnopencv/tree/master
Darunter "OpenPose"

https://learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/

In [None]:
from google.colab import drive
drive.mount("/content/drive/")

!rm -rf "/content/pose"
%cd "content"
!mkdir -p "/pose/mpi"
!wget https://raw.githubusercontent.com/spmallick/learnopencv/master/OpenPose/pose/mpi/pose_deploy_linevec.prototxt -P /content/pose/mpi
!wget https://raw.githubusercontent.com/spmallick/learnopencv/master/OpenPose/pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt -P /content/pose/mpi
# Nur MPI model verwenden
!wget -c https://www.dropbox.com/s/drumc6dzllfed16/pose_iter_160000.caffemodel -P /content/pose/mpi

In [None]:
!pip install opencv-python numpy

## Pose Estimation für Bild

## Pose Estimation für Video


In [None]:
import cv2
import time
import numpy as np 

protoFile = "pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"
weightsFile = "pose/mpi/pose_iter_160000.caffemodel"
nPoints = 15
POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13]]

inWidth = 368
inHeight = 368
threshold = 0.1

from google.colab import files
uploaded = files.upload()
input_source = next(iter(uploaded))

cap = cv2.VideoCapture(input_source)
hasFrame, frame = cap.read()



## Prozessor CPU oder GPU

Mit CPU dauert die Analyse etwa 3-4 Sekunden pro Frame. Mit GPU 1.5 bis 2 Sekunden.

Weil das Sample-Video 30 Frame per Sec. hat, dauert die Analyse ca 105 Sekunden für eine 1 Sekunde Video-Sequenz (bei CPU). 
Wenn das Test-Video 10 Sekunden lang ist, dauert die Analyse insgesamt ca. 18 min.


In [None]:
vid_writer = cv2.VideoWriter('output.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame.shape[1],frame.shape[0]))

net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

# use CPU
net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)

# Use GPU
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)


In [None]:
while True:
    t = time.time()
    hasFrame, frame = cap.read()
   
    frameCopy = np.copy(frame)
    if not hasFrame:
        break

    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]

    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                              (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inpBlob)
    output = net.forward()

    H = output.shape[2]
    W = output.shape[3]
    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]

        # Find global maxima of the probMap.
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
        
        # Scale the point to fit on the original image
        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H

        if prob > threshold : 
            cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)

            # Add the point to the list if the probability is greater than the threshold
            points.append((int(x), int(y)))
        else :
            points.append(None)

    # Draw Skeleton
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]

        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 3, lineType=cv2.LINE_AA)
            cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

    cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
   
    vid_writer.write(frame)

vid_writer.release()

## Video Analyse ...

In [None]:
from IPython.display import YouTubeVideo

YOUTUBE_ID = 'zGVfSRfybnI'


YouTubeVideo(YOUTUBE_ID)

In [None]:
!pip install yt-dlp

In [None]:
from yt_dlp import YoutubeDL

!rm youtube.mp4

option = {
        "outtmpl" : "youtube.%(ext)s",
        "format" : "best[ext=mp4]"
    }

ydl = YoutubeDL(option)
yt_link = "https://www.youtube.com/watch?v=" + YOUTUBE_ID
result = ydl.download([yt_link])

In [None]:

# Ausschnitt 5 Sekunden aus dem Video nehmen
!ffmpeg -y -loglevel info -i youtube.mp4 -ss 00:00:37.0 -t 5 video.mp4

import cv2
import time
import numpy as np 

protoFile = "pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"
weightsFile = "pose/mpi/pose_iter_160000.caffemodel"
nPoints = 15
POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13]]

inWidth = 368
inHeight = 368
threshold = 0.1

cap = cv2.VideoCapture("video.mp4")
hasFrame, frame = cap.read()

In [None]:
vid_writer = cv2.VideoWriter('output_fromYT.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame.shape[1],frame.shape[0]))

net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

# use CPU
net.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)

In [None]:
while True:
    t = time.time()
    hasFrame, frame = cap.read()
   
    frameCopy = np.copy(frame)
    if not hasFrame:
        break

    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]

    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                              (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inpBlob)
    output = net.forward()

    H = output.shape[2]
    W = output.shape[3]
    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]

        # Find global maxima of the probMap.
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
        
        # Scale the point to fit on the original image
        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H

        if prob > threshold : 
            cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)

            # Add the point to the list if the probability is greater than the threshold
            points.append((int(x), int(y)))
        else :
            points.append(None)

    # Draw Skeleton
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]

        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 3, lineType=cv2.LINE_AA)
            cv2.circle(frame, points[partA], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.circle(frame, points[partB], 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

    cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
   
    vid_writer.write(frame)

vid_writer.release()