In [None]:
import cv2
from typing import Any
import subprocess
import os
from cv2.typing import MatLike
from argparse import ArgumentParser
import numpy as np
from mirage.mirage_helpers import *
from mirage.pose_extract_base import MLAbstractInterface
from mirage.rgb_interface import CameraInterface
from mirage.movenet import MovenetInterface
from mirage.skeleton import SkeletonDetection

%matplotlib inline
from matplotlib import pyplot as plt

def showim(im):
    plt.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    plt.show()

def show_images(images, cols = 1, titles = None):
    """Display a list of images in a single figure with matplotlib.
    
    Parameters
    ---------
    images: List of np.arrays compatible with plt.imshow.
    
    cols (Default = 1): Number of columns in figure (number of rows is 
                        set to np.ceil(n_images/float(cols))).
    
    titles: List of titles corresponding to each image. Must have
            the same length as titles.
    """
    assert((titles is None)or (len(images) == len(titles)))
    n_images = len(images)
    if titles is None: titles = ['Image (%d)' % i for i in range(1,n_images + 1)]
    fig = plt.figure()
    for n, (image, title) in enumerate(zip(images, titles)):
        a = fig.add_subplot(cols, int(np.ceil(n_images/float(cols))), n + 1)
        if image.ndim == 2:
            plt.gray()
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        a.set_title(title)
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

In [None]:


def process_and_viz_split(i_cam: CameraInterface, ml_interface: MLAbstractInterface) -> MatLike:
    img = i_cam.get_next_frame()
    img_a, img_b = split_image_stack(img)
    a_crop = determine_crop_region(s_a, img_a.shape[0], img_a.shape[1])
    b_crop = determine_crop_region(s_b, img_b.shape[0], img_b.shape[1])
    img_a_kp = ml_interface.predict(img_a, a_crop)
    img_b_kp = ml_interface.predict(img_b, b_crop)
    s_a.update_predictions(img_a_kp, (1.0 / i_cam.get_frame_rate_per_second()))
    s_b.update_predictions(img_b_kp, (1.0 / i_cam.get_frame_rate_per_second()))
    img_a_viz = skeleton_to_image(img_a, s_a)
    img_b_viz = skeleton_to_image(img_b, s_b)
    return stack_image(img_a_viz, img_b_viz)


def process_and_viz(i_cam: CameraInterface, ml_interface: MLAbstractInterface) -> MatLike:
    img = i_cam.get_next_frame()
    img_kp = ml_interface.predict(img)
    s_a.update_predictions(img_kp, (1.0 / i_cam.get_frame_rate_per_second()))
    img_viz = skeleton_to_image(img, s_a)
    return img_viz


In [None]:
input_file = "../../Data/Reference_Parayno_Jeru.mp4"
frame_number_start = 150
frame_number_end = 160
frames = []
splitimage = True

In [None]:
# main

ml_interface = MovenetInterface()



In [None]:
def determine_crop_region(skele: SkeletonDetection, image_height, image_width) -> dict[str, float]:
    """determine the crop region to run inference, uses the skeleton detection to get
    a square region that encloses the full body of the target person.  when not confident in
    torso projections, falls back on full image padded to square. Modified from movenet tutorial
    """
    if not torso_visible(skele):
        return default_crop_region(image_height, image_width)
    center_y = (skele.joints[11].current_xy[1] + skele.joints[12].current_xy[1]) / 2
    center_x = (skele.joints[11].current_xy[0] + skele.joints[12].current_xy[0]) / 2
    (max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange) = determine_torso_and_body_range(
        skele, center_y, center_x, image_width, image_height
    )
    # from ratio to resolution space
    max_body_yrange *= image_height
    max_body_xrange *= image_width
    max_torso_yrange *= image_height
    max_torso_xrange *= image_width
    center_x *= image_width
    center_y *= image_height

    ranges = [max_torso_xrange * 2, max_torso_yrange * 2, max_body_xrange * 2, max_body_yrange * 2]
    crop_length_half = np.amax(ranges)
    tmp = np.array([center_x, image_width - center_x, center_y, image_height - center_y])
    crop_length_half = np.amin([crop_length_half, np.amax(tmp)])
    crop_corner = [center_y - crop_length_half, center_x - crop_length_half]

    if crop_length_half > max(image_width, image_height) / 2:
        return default_crop_region(image_height, image_width)
    else:
        crop_length = crop_length_half * 2
        return {
            "y_min": crop_corner[0],
            "x_min": crop_corner[1],
            "height": (crop_corner[0] + crop_length) - crop_corner[0],
            "width": (crop_corner[1] + crop_length) - crop_corner[1],
            "clh": crop_length_half,
        }

In [None]:
def k_coord(image: MatLike, keypoint: tuple[int, int, int]) -> tuple[int, int]:
    ratio = image.shape[1] / image.shape[0]
    # assuming landscape.. and padded to be square. TODO: make more robust or informed.
    padding_val = (image.shape[0] - image.shape[1]) / 2
    y_val: float = keypoint[0]
    x_val: float = keypoint[1]
    return (int(x_val * image.shape[1]), int(y_val * image.shape[0] * ratio + padding_val))


def skeleton_to_image(image: MatLike, skele: SkeletonDetection, min_confidence: float = 0.2):
    drawn_image: MatLike = image.copy()
    for i, joint in skele.joints.items():
        kpmap = joint
        if kpmap.display:
            y_val: float = joint.estimate[2]
            x_val: float = joint.estimate[0]
            confidence: float = joint.confidence
            if confidence > min_confidence:
                drawn_image = cv2.circle(
                    drawn_image,
                    k_coord(drawn_image, (y_val, x_val, 1)),
                    radius=5,
                    color=kpmap.color,
                    thickness=2,
                )
    for edge_k, edge_v in KeypointEdges.items():
        if skele.joints[edge_k[0]].display and skele.joints[edge_k[1]].display:
            drawn_image = cv2.line(
                drawn_image,
                k_coord(drawn_image, (skele.joints[edge_k[0]].estimate[2], skele.joints[edge_k[0]].estimate[0])),
                k_coord(drawn_image, (skele.joints[edge_k[1]].estimate[2], skele.joints[edge_k[1]].estimate[0])),
                edge_v,
                5,
            )
    return drawn_image

def keypoint_to_original_image_space(keypoints: np.ndarray, image: MatLike, crop_region: dict[str, int] | None = None
) -> np.ndarray:
    if crop_region is None: 
        return keypoints
    original_height, original_width, channels = image.shape
    padd = (original_width - original_height)
    for i in range(len(keypoints)):
        keypoints[i][1] = (crop_region["x_min"] + (keypoints[i][1] * float(crop_region["width"]))) / original_width
        keypoints[i][0] = (crop_region["y_min"] + (padd/2) + (keypoints[i][0] * float(crop_region["height"]))) / (original_height + padd)
    return keypoints

In [None]:
s_a = SkeletonDetection()
s_b = SkeletonDetection()
i_cam = CameraInterface(input_file)
i_cam.set_frame(frame_number_start)
frame_number_end = frame_number_end if frame_number_end != 0 else i_cam.get_total_frames()
##### reset

In [None]:
img = i_cam.get_next_frame()
img_a, img_b = split_image_stack(img)
a_crop = determine_crop_region(s_a, img_a.shape[0], img_a.shape[1])
b_crop = determine_crop_region(s_b, img_b.shape[0], img_b.shape[1])

print(a_crop)
if a_crop is not None:
    a_crop_img = crop_image(img_a, int(a_crop["y_min"]),
                    int(a_crop["height"]),
                    int(a_crop["x_min"]),
                    int(a_crop["width"]),
                    0,)
else:
    a_crop_img = img_a * 0
img_a_kp_raw = ml_interface.predict(img_a, a_crop)
img_a_kp = keypoint_to_original_image_space(img_a_kp_raw, img_a, a_crop)
s_a.update_predictions(img_a_kp, (1.0 / i_cam.get_frame_rate_per_second()))
img_a_viz = skeleton_to_image(img_a, s_a)

show_images([img_a, a_crop_img, img_a_viz], 2, ["original image", "crop used", "Estimated points"])
