In [1]:
import cv2
import os
from ultralytics import  YOLO
import numpy as np

### Video_to_Img

In [2]:
def _video_to_img(video_path, img_dir, idx):
    video_path = f"{video_path}/IMG_{idx}.MOV"
    video_capture = cv2.VideoCapture(video_path)
    video_capture.set(cv2.CAP_PROP_FPS, 60)

    if not os.path.exists(img_dir):
        os.mkdir(img_dir)

    saved_frame_name = 0
    while video_capture.isOpened():
        frame_is_read, frame = video_capture.read()

        if frame_is_read:
            cv2.imwrite(f"{img_dir}/{idx}-{saved_frame_name}.jpg", frame)
            saved_frame_name += 1

        else:
            print(f"Video to image for {video_path} is done.")
            break

### Impaint Image

In [3]:
def _impaint_image(img_path, save_path):
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    img = cv2.imread(img_path)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    lower_blue = np.array([100, 50, 100])
    upper_blue = np.array([130, 255, 255])

    mask = cv2.inRange(hsv, lower_blue, upper_blue)
    mask = cv2.GaussianBlur(mask, (15, 15), 0)
    restored_img = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)

    img_name = img_path.split("/")[-1].split(".")[0]
    cv2.imwrite(f"{save_path}/{img_name}.jpg", restored_img)
    return None

### GetKeypoint and Clean data

In [4]:
def count_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)

In [5]:
def _get_keypoint(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    if img is None:
        print(f"Image {image_path} is not found.")
        return None
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    lower_blue = np.array([100, 50, 100])
    upper_blue = np.array([130, 255, 255])
    mask = cv2.inRange(hsv, lower_blue, upper_blue)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    keypoints = []
    for cnt in contours:
        M = cv2.moments(cnt)
        if M['m00'] == 0:
            continue
        cx = int(M['m10'] / M['m00'])
        cy = int(M['m01'] / M['m00'])
        keypoints.append((cx, cy))
    # clean the point that is too close
    merged_contours = []
    if len(keypoints) != 3:
        for i in range(len(contours)):
            merged = False
            for j in range(i + 1, len(contours)):
                M1 = cv2.moments(contours[i])
                M2 = cv2.moments(contours[j])
                if M1['m00'] == 0 or M2['m00'] == 0:
                    continue
                cX1 = int(M1['m10'] / M1['m00'])
                cY1 = int(M1['m01'] / M1['m00'])
                cX2 = int(M2['m10'] / M2['m00'])
                cY2 = int(M2['m01'] / M2['m00'])
                if count_distance(cX1, cY1, cX2, cY2) < 30:
                    merged = True
                    break
            if not merged:
                merged_contours.append(contours[i])
    keypoints.clear()
    for cnt in contours:
        M = cv2.moments(cnt)
        if M['m00'] == 0:
            continue
        cx = int(M['m10'] / M['m00'])/width
        cy = int(M['m01'] / M['m00'])/height
        keypoints.append((cx, cy))
    return keypoints

  ### Tagged KeyPoint
  0: TaiYuan
  1: DaLing
  2: ShenMen
  3: YanGu
  4: YangChi
  5: YangShi

In [6]:
def _tag_by_location(image_path, hand_type, location, keypoints):
    if(hand_type == "left_back"):
        if(location == "left"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "YangGu": keypoints[0],
                "YanChi": keypoints[1],
                "YanShi": keypoints[2]
            }
        elif(location == "right"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "YangGu": keypoints[2],
                "YanChi": keypoints[1],
                "YanShi": keypoints[0]
            }
        elif(location == "up"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "YangGu": keypoints[2],
                "YanChi": keypoints[1],
                "YanShi": keypoints[0]
            }
        elif(location == "down"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "YangGu": keypoints[0],
                "YanChi": keypoints[1],
                "YanShi": keypoints[2]
            }
        else: return None
    elif(hand_type == "right_back"):
        if(location == "left"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "YangGu": keypoints[2],
                "YanChi": keypoints[1],
                "YanShi": keypoints[0]
            }
        elif(location == "right"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "YangGu": keypoints[0],
                "YanChi": keypoints[1],
                "YanShi": keypoints[2]
            }
        elif(location == "up"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "YangGu": keypoints[0],
                "YanChi": keypoints[1],
                "YanShi": keypoints[2]
            }
        elif(location == "down"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "YangGu": keypoints[2],
                "YanChi": keypoints[1],
                "YanShi": keypoints[0]
            }
        else: return None
    elif(hand_type == "left_front"):
        if(location == "left"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "TaiYuan": keypoints[0],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[2]
            }
        elif(location == "right"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "TaiYuan": keypoints[2],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[0]
            }
        elif(location == "up"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "TaiYuan": keypoints[2],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[0],
            }
        elif(location == "down"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "TaiYuan": keypoints[0],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[2],
            }
        else: return None
    elif(hand_type == "right_front"):
        if(location == "left"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "TaiYuan": keypoints[2],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[0]
            }
        elif(location == "right"):
            keypoints = sorted(keypoints, key=lambda x: x[1])
            return {
                "TaiYuan": keypoints[0],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[2],
            }
        elif(location == "up"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "TaiYuan": keypoints[0],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[2]
            }
        elif(location == "down"):
            keypoints = sorted(keypoints, key=lambda x: x[0])
            return {
                "TaiYuan": keypoints[2],
                "DaLing": keypoints[1],
                "ShenMen": keypoints[0]
            }
        else: return None
    else: return None

In [7]:
def _consider_location_of_point(img_path, keypoints):
    # find the w, h of the image
    img = cv2.imread(img_path)
    h, w, _ = img.shape
    # up down left right
    location = []
    for keypoint in keypoints:
        x, y = keypoint
        if y < h / 2:
            location.append("up")
        else:
            location.append("down")
        if x < w / 2:
            location.append("left")
        else:
            location.append("right")
        
        if location.count("up") == 3:
            return "up"
        elif location.count("down") == 3:
            return "down"
        elif location.count("left") == 3:
            return "left"
        elif location.count("right") == 3:
            return "right"

In [8]:
ACUPOINT_ENCODE = {'TaiYuan': 0, "DaLing": 1, "ShenMen": 2, "YangGu": 3, "YanChi": 4, "YanShi": 5}

In [9]:
def _get_image_information(img_path, hand_type, save_path):
    img = cv2.imread(img_path)
    height, width, _ = img.shape
    model = YOLO('yolov8n.pt')
    try: results = model(img_path)
    except:
        print(f"Image {img_path} is not found.")
        return None
    x, y, w, h = 0, 0, 0, 0
    for result in results:
        for box in result.boxes:
            if int(box.cls[0]) == 0:
                x, y, w, h = box.xywh[0]
                x /= width
                y /= height
                w /= width
                h /= height
                break
    if x == 0 and y == 0 and w == 0 and h == 0:
        return None
    keypoints = _get_keypoint(img_path)
    print(f"get {len(keypoints)} keypoints")
    # consider it is bad image --> delete it
    if len(keypoints) != 3: 
        return None

    # tag the keypoints
    location = _consider_location_of_point(img_path, keypoints)
    print(f"location: {location}")
    tagged_keypoints = _tag_by_location(img_path, hand_type, location, keypoints)
    print(f"tagged keypoints: {tagged_keypoints}")

    # save information into label text
    img_name = img_path.split("/")[-1].split(".")[0]
    _save_path = f"{save_path}/{img_name}.txt"
    with open(_save_path, "w") as f:
        f.write("")
    for key in tagged_keypoints:
        idx = ACUPOINT_ENCODE[key]
        point_x, point_y = tagged_keypoints[key]
        information = f"{idx} {x} {y} {w} {h} {point_x} {point_y}\n" #YOLO format
        #write information into txt
        with open(_save_path, "a") as f:
            f.write(information)
    return tagged_keypoints

In [10]:
# main
HAND_TYPE = ["left_back", "left_front", "right_front", "right_back"]
video_prefix = "datasets"
for t in HAND_TYPE:
    video_path = f"{video_prefix}/{t}"
    img_dir = f"{video_prefix}/{t}/images"
    label_dir = f"{video_prefix}/{t}/labels"
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    if not os.path.exists(label_dir):
        os.mkdir(label_dir)
    # run all video in the video_path
    files = os.listdir(video_path)

    for video in files:
        if not video.endswith(".MOV"):
            continue
        full_video_path = f"{video_path}/{video}"
        idx = video.split('_')[1].split('.')[0]
        _video_to_img(video_path, img_dir, idx)

    img_file = os.listdir(img_dir)
    for img in img_file:
        img_path = f"{img_dir}/{img}"
        checker = _get_image_information(img_path, t, label_dir)
        
        if checker is None: continue
        
        _impaint_image(img_path, f"{video_path}/impainted")


Video to image for datasets/left_back/IMG_3132.MOV is done.

image 1/1 /home/robin/project/1122_CSIE/acupoint/datasets/left_back/images/3132-610.jpg: 640x384 1 person, 68.6ms
Speed: 1.4ms preprocess, 68.6ms inference, 247.0ms postprocess per image at shape (1, 3, 640, 384)
get 3 keypoints
location: up
tagged keypoints: {'YangGu': (0.32407407407407407, 0.628125), 'YanChi': (0.2462962962962963, 0.5369791666666667), 'YanShi': (0.20277777777777778, 0.5109375)}

image 1/1 /home/robin/project/1122_CSIE/acupoint/datasets/left_back/images/3132-93.jpg: 640x384 1 person, 4.7ms
Speed: 1.4ms preprocess, 4.7ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 384)
get 3 keypoints
location: up
tagged keypoints: {'YangGu': (0.2962962962962963, 0.625), 'YanChi': (0.21203703703703702, 0.5364583333333334), 'YanShi': (0.1685185185185185, 0.5078125)}

image 1/1 /home/robin/project/1122_CSIE/acupoint/datasets/left_back/images/3132-3204.jpg: 640x384 1 person, 4.6ms
Speed: 1.3ms preprocess, 4.6ms i