In [1]:
import pandas as pd
import numpy as np
import re
import os
import cv2

### Set file path

In [2]:
data_dir = "data/1107"
swing_dir = "swing_1"
video_path = os.path.join(data_dir, swing_dir, "video.avi")

csv_path = os.path.join(data_dir, swing_dir, "pose_coord.csv")
cali_path = os.path.join(data_dir, "cali_out.npz")

video_frames = (170, 349) # set the range of video frame, start at 1
csv_frames = (1, 405) # set the range of csv frame, start at 1

pattern = "golf:(\w+)" # label pattern
usecols = 86 # csv columns about coordinates

# define three dots(vicon system) on the chessboard
a = [-88.5, 291.2, 1593.1] # y+ direction
b = [-94.4, 712.8, 1570.4] # origin
c = [1.8, 666.5, 734.7]   # x+ direction

### Parse CSV file
replace "nan" with "0"

In [3]:
def parse_csv(filepath, usecols, pattern):
    fps, labels, keypoints = 0, [], []

    fps = int(pd.read_csv(filepath, nrows=1).values[0][0])

    csv_df = pd.read_csv(filepath, skiprows=[0, 1], usecols=range(usecols))
    pattern = re.compile(pattern)

    labels = [pattern.match(col)[1] for col in csv_df.columns.values if pattern.match(col)]
    
    coord_df = csv_df.iloc[2:, 2:]
    coord_df.fillna(value=0, inplace=True)
    coord_li = coord_df.values

    for i in range(len(coord_li)):
        coord = [[float(coord_li[i][j]), -float(coord_li[i][j+1]), -float(coord_li[i][j+2])] for j in range(0, len(coord_li[i]), 3)]
        # if csv_frames[0]<= i+1 <= csv_frames[1]: keypoints.append(coord)
        keypoints.append(coord)

    return (fps, labels, keypoints)

### Convert axis system
from vicon to world system through three dots

In [4]:
def vicon_to_world_axis(base_dots, dots):
    a, b, c = np.asarray(base_dots[0]), np.asarray(base_dots[1]), np.asarray(base_dots[2])
    x_vec = c - b
    y_vec = a - b
    z_vec = np.cross(x_vec, y_vec)

    x_norm = np.linalg.norm(x_vec)
    y_norm = np.linalg.norm(y_vec)
    z_norm = np.linalg.norm(z_vec)

    result = []
    for frame_dots in dots:
        frame_result = []
        for dot in frame_dots:
            dot = np.asarray(dot)
            dot_vec = dot - b

            proj_x = (np.dot(dot_vec, x_vec) / x_norm**2) * x_vec
            proj_y = (np.dot(dot_vec, y_vec) / y_norm**2) * y_vec
            proj_z = (np.dot(dot_vec, z_vec) / z_norm**2) * z_vec

            x_coord = np.linalg.norm(proj_x)
            y_coord = np.linalg.norm(proj_y)
            z_coord = np.linalg.norm(proj_z)

            if not all(np.sign(x_vec)==np.sign(proj_x)): x_coord *= -1
            if not all(np.sign(y_vec)==np.sign(proj_y)): y_coord *= -1
            if not all(np.sign(z_vec)==np.sign(proj_z)): z_coord *= -1

            frame_result.append([x_coord, y_coord, z_coord])

        result.append(frame_result)

    return result

### Keypoints labels and edges
total 17 keypoints for person, and 2 for club

In [5]:
label_person = {
    0: "hip",  # estimate
    1: "r_hip", 
    2: "r_knee", 
    3: "r_ankle", 
    4: "l_hip", 
    5: "l_knee", 
    6: "l_ankle", 
    7: "spine", 
    8: "neck", # estimate
    9: "chin", # estimate
    10: "head", 
    11: "l_shoulder", 
    12: "l_elbow", 
    13: "l_wrist", 
    14: "r_shoulder", 
    15: "r_elbow", 
    16: "r_wrist"}
label_club = {0: "shaft1", 1: "shaft2", 2: "head"}

edges_person = [(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16)]
edges_club = [(0, 1), (1, 2)]

### Estimate the center keypoints

In [6]:
def estimate_keypoints(label_vicon, keypoints):
    keypoints = np.asarray(keypoints)
    kps_person, kps_club = [], []
    for item in keypoints:
        # person 
        head = (item[label_vicon.index("head1")] + item[label_vicon.index("head2")]) / 2

        spine = (item[label_vicon.index("center1")] + item[label_vicon.index("center2")]) / 2
        r_hip = (item[label_vicon.index("center3")] + item[label_vicon.index("center4")]) / 2
        l_hip = (item[label_vicon.index("center5")] + item[label_vicon.index("center6")]) / 2
        hip = (r_hip + l_hip) / 2

        l_shoulder = (item[label_vicon.index("left_arm1")] + item[label_vicon.index("left_arm2")]) / 2
        r_shoulder = (item[label_vicon.index("right_arm1")] + item[label_vicon.index("right_arm2")]) / 2
        neck = (l_shoulder + r_shoulder) / 2

        chin = (head + neck) / 2

        l_elbow = (item[label_vicon.index("left_arm3")] + item[label_vicon.index("left_arm4")]) / 2
        r_elbow = (item[label_vicon.index("right_arm3")] + item[label_vicon.index("right_arm4")]) / 2

        l_wrist = (item[label_vicon.index("left_arm5")] + item[label_vicon.index("left_arm6")]) / 2
        r_wrist = (item[label_vicon.index("right_arm5")] + item[label_vicon.index("right_arm6")]) / 2

        l_knee = (item[label_vicon.index("left_leg1")] + item[label_vicon.index("left_leg2")]) / 2
        r_knee = (item[label_vicon.index("right_leg1")] + item[label_vicon.index("right_leg2")]) / 2

        l_ankle = item[label_vicon.index("left_leg3")]
        r_ankle = item[label_vicon.index("right_leg3")]

        kps_person.append([
            hip.tolist(), r_hip.tolist(), r_knee.tolist(), r_ankle.tolist(), l_hip.tolist(), 
            l_knee.tolist(), l_ankle.tolist(), spine.tolist(), neck.tolist(), chin.tolist(), 
            head.tolist(), l_shoulder.tolist(), l_elbow.tolist(), l_wrist.tolist(), r_shoulder.tolist(), 
            r_elbow.tolist(), r_wrist.tolist()
        ])

        # club
        shaft1 = (item[label_vicon.index("club1")]).tolist()
        shaft2 = (item[label_vicon.index("club2")]).tolist()

        unit_vector = (item[label_vicon.index("club2")] - item[label_vicon.index("club1")]) / np.linalg.norm(item[label_vicon.index("club2")] - item[label_vicon.index("club1")])
        club_head = item[label_vicon.index("club2")] + unit_vector *290 
        
        kps_club.append([shaft1, shaft2, club_head])

    return (kps_person, kps_club)

### Get total keypoints
- parse csv file
- (optional) convert from vicon to chessboard system
- estimate real joints

In [7]:
fps, label_vicon, keypoints = parse_csv(csv_path, usecols, pattern)
# keypoints = vicon_to_world_axis((a, b, c), keypoints)
person_kp, club_kp = estimate_keypoints(label_vicon, keypoints)

print(f"Vicon FPS = {fps}")
print(f"Total person keypoint frame = {len(person_kp)}")
print(f"Total club keypoint frame = {len(club_kp)}")


Vicon FPS = 110
Total person keypoint frame = 405
Total club keypoint frame = 405


### Align csv frames and video frames based on index

In [8]:
total_video_frames = video_frames[1] - video_frames[0] + 1
total_csv_frames = csv_frames[1] - csv_frames[0] + 1

video_frame_idxs = [*range(video_frames[0]-1, video_frames[1])]
csv_frame_idxs = [*range(csv_frames[0]-1, csv_frames[1])]


if total_csv_frames > total_video_frames:
    csv_frame_idxs = np.linspace(
                csv_frames[0]-1, csv_frames[1]-1, 
                num=total_video_frames,
                endpoint=True, 
                retstep=False, 
                dtype=int
            ).tolist()
else:
    video_frame_idxs = np.linspace(
                video_frames[0]-1, video_frames[1]-1, 
                num=total_csv_frames,
                endpoint=True, 
                retstep=False, 
                dtype=int
            ).tolist()

print(f"csv_frame_idxs: \n{csv_frame_idxs} \nlen = {len(csv_frame_idxs)}\n")
print(f"video_frame_idxs: \n{video_frame_idxs} \nlen = {len(video_frame_idxs)}\n")


csv_frame_idxs: 
[0, 2, 4, 6, 9, 11, 13, 15, 18, 20, 22, 24, 27, 29, 31, 33, 36, 38, 40, 42, 45, 47, 49, 51, 54, 56, 58, 60, 63, 65, 67, 69, 72, 74, 76, 78, 81, 83, 85, 88, 90, 92, 94, 97, 99, 101, 103, 106, 108, 110, 112, 115, 117, 119, 121, 124, 126, 128, 130, 133, 135, 137, 139, 142, 144, 146, 148, 151, 153, 155, 157, 160, 162, 164, 167, 169, 171, 173, 176, 178, 180, 182, 185, 187, 189, 191, 194, 196, 198, 200, 203, 205, 207, 209, 212, 214, 216, 218, 221, 223, 225, 227, 230, 232, 234, 236, 239, 241, 243, 246, 248, 250, 252, 255, 257, 259, 261, 264, 266, 268, 270, 273, 275, 277, 279, 282, 284, 286, 288, 291, 293, 295, 297, 300, 302, 304, 306, 309, 311, 313, 315, 318, 320, 322, 325, 327, 329, 331, 334, 336, 338, 340, 343, 345, 347, 349, 352, 354, 356, 358, 361, 363, 365, 367, 370, 372, 374, 376, 379, 381, 383, 385, 388, 390, 392, 394, 397, 399, 401, 404] 
len = 180

video_frame_idxs: 
[169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 

### Load calibration result
Extrinsic and Intrinsic matrix

In [9]:
cali_info = np.load(cali_path)
img_index = int(dict(cali_info["img_dict"])["base_cb.bmp"])

rvecs, tvecs, mtx, dist = cali_info["rvecs"], cali_info["tvecs"], cali_info["mtx"], cali_info["dist"]
rvecs, tvecs = rvecs[img_index], tvecs[img_index]

### Project 3D to 2D coordinates and render 2D skeleton on a single image

In [10]:
def render_skeleton(frame, person, club, rvecs, tvecs, mtx, dist, shift=15):
    person_2d_kp = []
    club_2d_kp = []
    
    size, p_color, c_color, thickness = 3, (255, 165, 0), (102, 255, 230), 3

    for coord in person:
        imgPoint, _ = cv2.projectPoints(np.array(coord), rvecs, tvecs, mtx, dist)
        point = (round(imgPoint[0][0][0]-shift), round(imgPoint[0][0][1]))
        
        person_2d_kp.append(point)
        cv2.circle(frame, point, size, p_color, thickness)
        
    
    for coord in club:
        imgPoint, _ = cv2.projectPoints(np.array(coord), rvecs, tvecs, mtx, dist)
        point = (round(imgPoint[0][0][0]-shift), round(imgPoint[0][0][1]))
        club_2d_kp.append(point)
        cv2.circle(frame, point, size, c_color, thickness)

    for edge in edges_person:
        start, end = edge
        cv2.line(frame, person_2d_kp[start], person_2d_kp[end], p_color, thickness=thickness)
    
    for edge in edges_club:
        start, end = edge
        cv2.line(frame, club_2d_kp[start], club_2d_kp[end], c_color, thickness=thickness)
    
    return frame
        

### Iterate video frames and csv frames
without shift

In [12]:
csv_iterator = iter(csv_frame_idxs)
video_iterator = iter(video_frame_idxs)

cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # mp4
out = cv2.VideoWriter(os.path.join(data_dir, swing_dir, "2d_no_shift.mp4"), fourcc, 20, (1920, 1200))

frame_no, cur_video = 0, next(video_iterator)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("video read ending...")
        break
    
    if frame_no == cur_video:
        cur_csv = next(csv_iterator)

        rendered_frame = render_skeleton(
            frame, person_kp[cur_csv], club_kp[cur_csv],
            rvecs, tvecs, mtx, dist, 0
        )

        cv2.imshow("golf_swing", rendered_frame)
        c = cv2.waitKey(500)
        if c & 0xFF == ord('q'):
            break
        
        out.write(rendered_frame)
        if cur_video == video_frame_idxs[-1]: 
            break
        else:
            cur_video = next(video_iterator)
        
    frame_no += 1

cap.release()
out.release()
cv2.destroyAllWindows()

### Iterate video frames and csv frames
with shift=15

In [87]:
csv_iterator = iter(csv_frame_idxs)
video_iterator = iter(video_frame_idxs)

cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # mp4
out = cv2.VideoWriter(os.path.join(data_dir, swing_dir, "2d_shift.mp4"), fourcc, 20, (1920, 1200))

frame_no, cur_video = 0, next(video_iterator)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("video read ending...")
        break
    
    if frame_no == cur_video:
        cur_csv = next(csv_iterator)

        rendered_frame = render_skeleton(
            frame, person_kp[cur_csv], club_kp[cur_csv],
            rvecs, tvecs, mtx, dist
        )

        # cv2.imshow("golf_swing", rendered_frame)
        # c = cv2.waitKey(500)
        # if c & 0xFF == ord('q'):
        #     break
        
        out.write(rendered_frame)
        if cur_video == video_frame_idxs[-1]: 
            break
        else:
            cur_video = next(video_iterator)
        
    frame_no += 1

cap.release()
out.release()
cv2.destroyAllWindows()

### Save selected frame as video

In [88]:
csv_iterator = iter(csv_frame_idxs)
video_iterator = iter(video_frame_idxs)

cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # mp4
out = cv2.VideoWriter(os.path.join(data_dir, swing_dir, "original.mp4"), fourcc, 20, (1920, 1200))

frame_no, cur_video = 0, next(video_iterator)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("video read ending...")
        break
    
    if frame_no == cur_video:
        cur_csv = next(csv_iterator)
        out.write(frame)

        if cur_video == video_frame_idxs[-1]: 
            break
        else:
            cur_video = next(video_iterator)
        
    frame_no += 1

cap.release()
out.release()
cv2.destroyAllWindows()