In [126]:
import pandas as pd
import numpy as np
import re
import os
import cv2
import copy

### Set file path

In [127]:
data_dir = "data/1107"
swing_dir = "swing_1"
video_path = os.path.join(data_dir, swing_dir, "video.avi")

csv_path = os.path.join(data_dir, swing_dir, "pose_coord.csv")
cali_path = os.path.join(data_dir, "cali_out.npz")

video_frame_range = (169, 348) # set the range of video frame, start at 0
csv_frame_range = (0, 404) # set the range of csv frame, start at 0

pattern = "golf:(\w+)" # label pattern
usecols = 86 # csv columns about coordinates

auto_align = False

# define three dots(vicon system) on the chessboard
a = [-88.5, 291.2, 1593.1] # y+ direction
b = [-94.4, 712.8, 1570.4] # origin
c = [1.8, 666.5, 734.7]   # x+ direction

### Parse CSV file
replace "nan" with "0"

In [128]:
def parse_csv(filepath, usecols, pattern):
    fps, labels, keypoints = 0, [], []

    fps = int(pd.read_csv(filepath, nrows=1).values[0][0])

    csv_df = pd.read_csv(filepath, skiprows=[0, 1], usecols=range(usecols))
    pattern = re.compile(pattern)

    labels = [pattern.match(col)[1] for col in csv_df.columns.values if pattern.match(col)]
    
    coord_df = csv_df.iloc[2:, 2:]
    coord_df.fillna(value=0, inplace=True)
    coord_li = coord_df.values

    for i in range(len(coord_li)):
        coord = [[float(coord_li[i][j]), -float(coord_li[i][j+1]), -float(coord_li[i][j+2])] for j in range(0, len(coord_li[i]), 3)]
        keypoints.append(coord)

    return (fps, labels, keypoints)

### Convert axis system
from vicon to world system through three dots

In [129]:
def vicon_to_world_axis(base_dots, dots):
    a, b, c = np.asarray(base_dots[0]), np.asarray(base_dots[1]), np.asarray(base_dots[2])
    x_vec = c - b
    y_vec = a - b
    z_vec = np.cross(x_vec, y_vec)

    x_norm = np.linalg.norm(x_vec)
    y_norm = np.linalg.norm(y_vec)
    z_norm = np.linalg.norm(z_vec)

    result = []
    for frame_dots in dots:
        frame_result = []
        for dot in frame_dots:
            dot = np.asarray(dot)
            dot_vec = dot - b

            proj_x = (np.dot(dot_vec, x_vec) / x_norm**2) * x_vec
            proj_y = (np.dot(dot_vec, y_vec) / y_norm**2) * y_vec
            proj_z = (np.dot(dot_vec, z_vec) / z_norm**2) * z_vec

            x_coord = np.linalg.norm(proj_x)
            y_coord = np.linalg.norm(proj_y)
            z_coord = np.linalg.norm(proj_z)

            if not all(np.sign(x_vec)==np.sign(proj_x)): x_coord *= -1
            if not all(np.sign(y_vec)==np.sign(proj_y)): y_coord *= -1
            if not all(np.sign(z_vec)==np.sign(proj_z)): z_coord *= -1

            frame_result.append([x_coord, y_coord, z_coord])

        result.append(frame_result)

    return result

### Keypoints labels and edges
total 17 keypoints for person, and 2 for club

In [130]:
label_person = {
    0: "hip",  # estimate
    1: "r_hip", 
    2: "r_knee", 
    3: "r_ankle", 
    4: "l_hip", 
    5: "l_knee", 
    6: "l_ankle", 
    7: "spine", 
    8: "neck", # estimate
    9: "chin", # estimate
    10: "head", 
    11: "l_shoulder", 
    12: "l_elbow", 
    13: "l_wrist", 
    14: "r_shoulder", 
    15: "r_elbow", 
    16: "r_wrist"}
label_club = {0: "shaft1", 1: "shaft2", 2: "head"}

edges_person = [(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16)]
edges_club = [(0, 1), (1, 2)]

### Estimate the center keypoints

In [131]:
def estimate_keypoints(label_vicon, keypoints):
    keypoints = np.asarray(keypoints)
    kps_person, kps_club = [], []
    for item in keypoints:
        # person 
        head = (item[label_vicon.index("head1")] + item[label_vicon.index("head2")]) / 2

        spine = (item[label_vicon.index("center1")] + item[label_vicon.index("center2")]) / 2
        r_hip = (item[label_vicon.index("center3")] + item[label_vicon.index("center4")]) / 2
        l_hip = (item[label_vicon.index("center5")] + item[label_vicon.index("center6")]) / 2
        hip = (r_hip + l_hip) / 2

        l_shoulder = (item[label_vicon.index("left_arm1")] + item[label_vicon.index("left_arm2")]) / 2
        r_shoulder = (item[label_vicon.index("right_arm1")] + item[label_vicon.index("right_arm2")]) / 2
        neck = (l_shoulder + r_shoulder) / 2

        chin = (head + neck) / 2

        l_elbow = (item[label_vicon.index("left_arm3")] + item[label_vicon.index("left_arm4")]) / 2
        r_elbow = (item[label_vicon.index("right_arm3")] + item[label_vicon.index("right_arm4")]) / 2

        l_wrist = (item[label_vicon.index("left_arm5")] + item[label_vicon.index("left_arm6")]) / 2
        r_wrist = (item[label_vicon.index("right_arm5")] + item[label_vicon.index("right_arm6")]) / 2

        l_knee = (item[label_vicon.index("left_leg1")] + item[label_vicon.index("left_leg2")]) / 2
        r_knee = (item[label_vicon.index("right_leg1")] + item[label_vicon.index("right_leg2")]) / 2

        l_ankle = item[label_vicon.index("left_leg3")]
        r_ankle = item[label_vicon.index("right_leg3")]

        kps_person.append([
            hip.tolist(), r_hip.tolist(), r_knee.tolist(), r_ankle.tolist(), l_hip.tolist(), 
            l_knee.tolist(), l_ankle.tolist(), spine.tolist(), neck.tolist(), chin.tolist(), 
            head.tolist(), l_shoulder.tolist(), l_elbow.tolist(), l_wrist.tolist(), r_shoulder.tolist(), 
            r_elbow.tolist(), r_wrist.tolist()
        ])

        # club
        shaft1 = (item[label_vicon.index("club1")]).tolist()
        shaft2 = (item[label_vicon.index("club2")]).tolist()

        unit_vector = (item[label_vicon.index("club2")] - item[label_vicon.index("club1")]) / np.linalg.norm(item[label_vicon.index("club2")] - item[label_vicon.index("club1")])
        club_head = item[label_vicon.index("club2")] + unit_vector *290 
        
        kps_club.append([shaft1, shaft2, club_head])

    return (kps_person, kps_club)

### Get total keypoints
- parse csv file
- (optional) convert from vicon to chessboard system
- estimate real joints

In [132]:
fps, label_vicon, keypoints = parse_csv(csv_path, usecols, pattern)
# keypoints = vicon_to_world_axis((a, b, c), keypoints)
person_kp, club_kp = estimate_keypoints(label_vicon, keypoints)

print(f"Vicon FPS = {fps}")
print(f"Total person keypoint frame = {len(person_kp)}")
print(f"Total club keypoint frame = {len(club_kp)}")


Vicon FPS = 110
Total person keypoint frame = 405
Total club keypoint frame = 405


### Load calibration result
Extrinsic and Intrinsic matrix

In [133]:
cali_info = np.load(cali_path)
img_index = int(dict(cali_info["img_dict"])["base_cb.bmp"])

rvecs, tvecs, mtx, dist = cali_info["rvecs"], cali_info["tvecs"], cali_info["mtx"], cali_info["dist"]
rvecs, tvecs = rvecs[img_index], tvecs[img_index]

### Render 2D skeleton on a single image

In [134]:
def render_skeleton(frame, person, club):
    person = tuple(person)
    club = tuple(club)

    size, p_color, c_color, thickness = 3, (255, 165, 0), (102, 255, 230), 3

    for coord in person:
        cv2.circle(frame, coord, size, p_color, thickness)
    
    for coord in club:
        cv2.circle(frame, coord, size, c_color, thickness)

    for edge in edges_person:
        start, end = edge
        cv2.line(frame, person[start], person[end], p_color, thickness=thickness)
    
    for edge in edges_club:
        start, end = edge
        cv2.line(frame, club[start], club[end], c_color, thickness=thickness)
    
    return frame
        

### Project 3D to 2D coordinates

In [135]:
def project_to_2d(person, club, shift=15):
    person_2d_kp = []
    club_2d_kp = []

    for coord in person:
        imgPoint, _ = cv2.projectPoints(np.array(coord), rvecs, tvecs, mtx, dist)
        point = [round(imgPoint[0][0][0]-shift), round(imgPoint[0][0][1])]
        person_2d_kp.append(point)        
    
    for coord in club:
        imgPoint, _ = cv2.projectPoints(np.array(coord), rvecs, tvecs, mtx, dist)
        point = [round(imgPoint[0][0][0]-shift), round(imgPoint[0][0][1])]
        club_2d_kp.append(point)
    
    return person_2d_kp, club_2d_kp

### Align csv frames and video frames based on index

In [136]:
video_frame_len = video_frame_range[1] - video_frame_range[0] + 1
csv_frame_len = csv_frame_range[1] - csv_frame_range[0] + 1

video_frame_idxs = [*range(video_frame_range[0], video_frame_range[1]+1)]
csv_frame_idxs = [*range(csv_frame_range[0], csv_frame_range[1]+1)]

if auto_align:

    if csv_frame_len > video_frame_len:
        selected_csv_frame_idxs = np.linspace(
                    csv_frame_range[0], csv_frame_range[1], 
                    num=video_frame_len,
                    endpoint=True, 
                    retstep=False, 
                    dtype=int
                ).tolist()
        selected_video_frame_idxs = video_frame_idxs
    else:
        selected_video_frame_idxs = np.linspace(
                    video_frame_range[0], video_frame_range[1], 
                    num=csv_frame_len,
                    endpoint=True, 
                    retstep=False, 
                    dtype=int
                ).tolist()
        selected_csv_frame_idxs = csv_frame_idxs
    
else:

    selected_dict = {}
    video_frame_li, csv_frame_p_li, csv_frame_c_li = [], [], []
    

    # load video frames from assigned indexs
    print("loading video frames ...")
    cap = cv2.VideoCapture(video_path)
    video_iterator = iter(video_frame_idxs)
    frame_no, cur_video = 0, next(video_iterator)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Exiting...")
            break
        if frame_no == cur_video:
            video_frame_li.append(frame)
        
            if cur_video == video_frame_idxs[-1]:
                break
            else:
                cur_video = next(video_iterator)

        frame_no += 1
    cap.release()
    print(f"loaded frame, count = {len(video_frame_li)}")


    # load csv frames from assigned indexs
    print("loading csv frames ...")
    csv_frame_p_li = person_kp[csv_frame_range[0]:csv_frame_range[1]+1]
    csv_frame_c_li = club_kp[csv_frame_range[0]:csv_frame_range[1]+1]


    # show the pair of video frames and csv frames
    video_ptr, csv_ptr = 0, 0
    while True:
        
        img = copy.deepcopy(video_frame_li[video_ptr])
        keypoint_3d_p = csv_frame_p_li[csv_ptr]
        keypoint_3d_c = csv_frame_c_li[csv_ptr]
        keypoint_2d_p, keypoint_2d_c = project_to_2d(keypoint_3d_p, keypoint_3d_c, 15)

        cur_video_idx = video_frame_range[0] + video_ptr
        cur_csv_idx = csv_frame_range[0] + csv_ptr

        # put skeleton into img
        rendered_frame = render_skeleton(
            img, keypoint_2d_p, keypoint_2d_c
        )
        
        # put index info into img
        rendered_frame = cv2.putText(rendered_frame, f"(video_index, ncsv_index) =  ({cur_video_idx}, {cur_csv_idx})", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,51), 2, cv2.LINE_AA)

        cv2.imshow("frame", rendered_frame)

        c = cv2.waitKey() & 0xFF
        if c == 13: # enter
            selected_dict[cur_video_idx] = cur_csv_idx
            video_ptr += 1
            csv_ptr += 1
        elif c == 82: # up
            video_ptr -= 1
        elif c == 84: # down
            video_ptr += 1
        elif c == 81: # left
            csv_ptr -= 1
        elif c == 83: # right
            csv_ptr += 1
        elif c == ord("q"):
            break

        if video_ptr >= len(video_frame_li):
            video_ptr = len(video_frame_li) - 1
        elif video_ptr < 0:
            video_ptr = 0

        if csv_ptr >= len(csv_frame_p_li):
            csv_ptr = len(csv_frame_p_li) - 1
        elif csv_ptr < 0:
            csv_ptr = 0
            
    cv2.destroyAllWindows()
    
    selected_csv_frame_idxs = []
    selected_video_frame_idxs = []
    for key in sorted(selected_dict.keys()):
        selected_video_frame_idxs.append(key)
        selected_csv_frame_idxs.append(selected_dict[key])

print(f"selected_video_frame_idxs:\nlen = {len(selected_video_frame_idxs)}\n{video_frame_idxs}\n")
print(f"selected_csv_frame_idxs:\nlen = {len(selected_csv_frame_idxs)}\n{csv_frame_idxs}\n")


loading video frames ...
loaded frame, count = 180
loading csv frames ...
selected_video_frame_idxs:
len = 174
[169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346

### Save seleted result as npz

In [137]:
seleted_kp_3d_p_li = []
seleted_kp_3d_c_li = []
seleted_kp_2d_p_li = []
seleted_kp_2d_c_li = []


for i in selected_csv_frame_idxs:
    seleted_kp_3d_p_li.append(person_kp[i])
    seleted_kp_3d_c_li.append(club_kp[i])

    kp_2d_p, kp_2d_c = project_to_2d(person_kp[i], club_kp[i])
    seleted_kp_2d_p_li.append(kp_2d_p)
    seleted_kp_2d_c_li.append(kp_2d_c)

result_path = os.path.join(data_dir, swing_dir, "skeleton.npz")
np.savez(result_path,
        video_frame_index=selected_video_frame_idxs,
        csv_frame_index=selected_csv_frame_idxs,
        keypoints_3d_person=seleted_kp_3d_p_li,
        keypoints_3d_club=seleted_kp_3d_c_li,
        keypoints_2d_person=seleted_kp_2d_p_li,
        keypoint_2d_club=seleted_kp_2d_c_li
        )
print(f"Saved npz file in {result_path}")


Saved npz file in data/1107/swing_1/skeleton.npz


### Save seleted result as mp4

In [138]:
original_video_path = os.path.join(data_dir, swing_dir, "original.mp4")
no_shift_video_path = os.path.join(data_dir, swing_dir, "2d_no_shift.mp4")
shift_video_path = os.path.join(data_dir, swing_dir, "2d_shift.mp4")

csv_iterator = iter(selected_csv_frame_idxs)
video_iterator = iter(selected_video_frame_idxs)

cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # mp4
original_out = cv2.VideoWriter(original_video_path, fourcc, 20, (1920, 1200))
no_shift_out = cv2.VideoWriter(no_shift_video_path, fourcc, 20, (1920, 1200))
shift_out = cv2.VideoWriter(shift_video_path, fourcc, 20, (1920, 1200))

frame_no = 0
cur_video = next(video_iterator)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("video read ending...")
        break
    
    if frame_no == cur_video:
        cur_csv = next(csv_iterator)

        original_out.write(frame)

        kp_2d_p, kp_2d_c = project_to_2d(person_kp[cur_csv], club_kp[cur_csv], 0)
        rendered_frame_no_shift = render_skeleton(copy.deepcopy(frame), kp_2d_p, kp_2d_c)
        no_shift_out.write(rendered_frame_no_shift)

        kp_2d_p, kp_2d_c = project_to_2d(person_kp[cur_csv], club_kp[cur_csv], 15)
        rendered_frame_shift = render_skeleton(copy.deepcopy(frame), kp_2d_p, kp_2d_c)
        shift_out.write(rendered_frame_shift)

        # cv2.imshow("golf_swing", rendered_frame)
        # c = cv2.waitKey(500)
        # if c & 0xFF == ord('q'):
        #     break
        
        if cur_video == video_frame_idxs[-1]: 
            break
        else:
            cur_video = next(video_iterator)
        
    frame_no += 1

cap.release()
original_out.release()
no_shift_out.release()
shift_out.release()
cv2.destroyAllWindows()

print(f"Saved seleted video frames as mp4 in {original_video_path}")
print(f"Saved seleted video frames with no shifted skeleton as mp4 in {no_shift_video_path}")
print(f"Saved seleted video frames with shifted skeleton as mp4 in {shift_video_path}")

Saved seleted video frames as mp4 in data/1107/swing_1/original.mp4
Saved seleted video frames with no shifted skeleton as mp4 in data/1107/swing_1/2d_no_shift.mp4
Saved seleted video frames with shifted skeleton as mp4 in data/1107/swing_1/2d_shift.mp4
