# Labelling golf club

In [1]:
""" ###Requirements###
: numpy
: matplotlib
: torch
: opencv-python
: torchvision
"""

' ###Requirements###\n: numpy\n: matplotlib\n: torch\n: opencv-python\n: torchvision\n'

In [1]:
"""Generate body keypoints using pretrained model"""

import torch
import torchvision
import numpy as np
import cv2
import argparse
from PIL import Image
from torchvision.transforms import transforms as transforms
import pandas as pd
import os
from pathlib import Path
import matplotlib



columns_list = [
'video_number', 'frame_number',
'nose_x', 'nose_x',
'left_eye_x', 'left_eye_y',
'right_eye_x', 'right_eye_y',
'left_ear_x', 'left_ear_y',
'right_ear_x', 'right_ear_y',
'left_shoulder_x', 'left_shoulder_y',
'right_shoulder_x', 'right_shoulder_y',
'left_elbow_x', 'left_elbow_y',
'right_elbow_x', 'right_elbow_y',
'left_wrist_x', 'left_wrist_y',
'right_wrist_x', 'right_wrist_y',
'left_hip_x', 'left_hip_y',
'right_hip_x', 'right_hip_y',
'left_knee_x', 'left_knee_y',
'right_knee_x', 'right_knee_y',
'left_ankle_x', 'left_ankle_y',
'right_ankle_x', 'right_ankle_y']
edges_raw = [
    (0, 1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10),
    (5, 7), (7, 9), (5, 11), (11, 13), (13, 15), (6, 12),
    (12, 14), (14, 16), (5, 6)]
def generate_keypoints(outputs):
    # the `outputs` is list which in-turn contains the dictionaries 
    for i in range(len(outputs[0]['keypoints'])):
        keypoints = outputs[0]['keypoints'][i].cpu().detach().numpy()
        # proceed to draw the lines if the confidence score is above 0.9
        if outputs[0]['scores'][i] > 0.9:
            keypoints = keypoints[:, :].reshape(-1, 3)
            keypoints = np.delete(keypoints, 2, 1)
            keypoints = np.vectorize(int)(keypoints)
            return keypoints
        else:
            continue
def draw_keypoints(keypoints, image_input_path, keypoint_numbers = False):
    image = Image.open(image_input_path).convert('RGB')
    # NumPy copy of the image for OpenCV functions
    image_input = np.array(image, dtype=np.float32)
    # convert the NumPy image to OpenCV BGR format
    image_input = cv2.cvtColor(image_input, cv2.COLOR_RGB2BGR) / 255.
    
    
    for p in range(keypoints.shape[0]):
        # draw the keypoints
        cv2.circle(image_input, (int(keypoints[p, 0]), int(keypoints[p, 1])), 3, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
        #uncomment the following lines if you want to put keypoint number
        if keypoint_numbers:
            cv2.putText(image_input, f"{p}", (int(keypoints[p, 0]+10), int(keypoints[p, 1]-5)),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    
    edges = edges_raw
    if len(keypoints) != 17: ## Includes club
        edges.append((17,18))
        
    for ie, e in enumerate(edges):
        # get different colors for the edges
        rgb = matplotlib.colors.hsv_to_rgb([
        ie/float(len(edges)), 1.0, 1.0
        ])
        rgb = rgb*255
        # join the keypoint pairs to draw the skeletal structure
        cv2.line(image_input, (int(keypoints[e, 0][0]), int(keypoints[e, 1][0])),
            (int(keypoints[e, 0][1]), int(keypoints[e, 1][1])),
            tuple(rgb), 2, lineType=cv2.LINE_AA)
    return image_input


cwd = str(Path(os.getcwd()))
###Creating folders for the dataset
pose_dataset_path = cwd + r'\PoseDataset'
if not os.path.exists(pose_dataset_path): # Make dir if it doesn't exist
    os.makedirs(pose_dataset_path)
    
basic_frames_path = pose_dataset_path + r'\0_basic_frames'
if not os.path.exists(basic_frames_path):
    os.makedirs(basic_frames_path)
body_labelled_frames_path = pose_dataset_path + r'\1_body_labelled_frames'
if not os.path.exists(body_labelled_frames_path):
    os.makedirs(body_labelled_frames_path)
club_labelled_frames_path = pose_dataset_path + r'\2_club_labelled_frames'
if not os.path.exists(club_labelled_frames_path):
    os.makedirs(club_labelled_frames_path)

    
# transform to convert the image to tensor
transform = transforms.Compose([
    transforms.ToTensor()
])

# initialize the model
model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True,
                                                               num_keypoints=17)
# set the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# load the modle on to the computation device and set to eval mode
model.to(device).eval()

input_videos_folder_path =  cwd + r'\input_videos'

video_numbers = [0, 1]
frames_metadata = []
for video_number in video_numbers:
    
    input_video_path = input_videos_folder_path + r'\video' + str(video_number) + '.mp4'
    vidcap = cv2.VideoCapture(input_video_path)
    success,image = vidcap.read()
    frame_number = 0


    while success:
        basic_save_path = basic_frames_path +  r'\frame_' + str(video_number) + '_' +str(frame_number) + '.jpg'
        print(basic_save_path)
        cv2.imwrite(basic_save_path, image)     # save frame as JPEG file      

        if not success:
            print("Finished progressing frames")

        image = Image.open(basic_save_path).convert('RGB')
        # NumPy copy of the image for OpenCV functions
        orig_numpy = np.array(image, dtype=np.dawfloat32)
        # convert the NumPy image to OpenCV BGR format
        orig_numpy = cv2.cvtColor(orig_numpy, cv2.COLOR_RGB2BGR) / 255.
        # transform the image
        image = transform(image)
        # add a batch dimension
        image = image.unsqueeze(0).to(device)
        with torch.no_grad():
            outputs = model(image)
        keypoints = generate_keypoints(outputs)

        frame_metadata= [video_number, frame_number]
        keypoints_transposed = keypoints.transpose()
        for x, y in zip(list(keypoints_transposed[0]), list(keypoints_transposed[1])):
            frame_metadata.append(x)
            frame_metadata.append(y)
        frames_metadata.append(frame_metadata)

        labelled_image = draw_keypoints(keypoints, basic_save_path)
        labelled_image_path = body_labelled_frames_path +  r'\frame_' + str(video_number) + '_' +str(frame_number) + '.jpg'
        cv2.imwrite(labelled_image_path, labelled_image*255.)

        ##############
        frame_number += 1
        success,image = vidcap.read()
        if frame_number == 3:
            break
df = pd.DataFrame(frames_metadata, columns = columns_list)
df_filepath = body_labelled_frames_path + r'\data.csv'
df.to_csv(df_filepath)

C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_0_0.jpg
C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_0_1.jpg
C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_0_2.jpg
C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_1_0.jpg
C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_1_1.jpg
C:\Users\James\git\GolfAI\PoseDataset\0_basic_frames\frame_1_2.jpg


In [1]:
"""Label keypoints on image"""

import random
import cv2
import pandas as pd
import os
from pathlib import Path
import numpy as np
from PIL import Image
import matplotlib

def click_event(event, x, y, flags, params):
    # checking for left mouse clicks or right mouse clicks
    if event == cv2.EVENT_LBUTTONDOWN or event==cv2.EVENT_RBUTTONDOWN:
 
        golf_club_coordinates.append([int(x),int(y)])

def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    return cv2.resize(image, dim, interpolation=inter)

edges_raw = [
    (0, 1), (0, 2), (2, 4), (1, 3), (6, 8), (8, 10),
    (5, 7), (7, 9), (5, 11), (11, 13), (13, 15), (6, 12),
    (12, 14), (14, 16), (5, 6)]

def draw_keypoints(keypoints, image_input_path, keypoint_numbers = False):
    ### COPY OF FUNCTION ###
    image = Image.open(image_input_path).convert('RGB')
    # NumPy copy of the image for OpenCV functions
    image_input = np.array(image, dtype=np.float32)
    # convert the NumPy image to OpenCV BGR format
    image_input = cv2.cvtColor(image_input, cv2.COLOR_RGB2BGR) / 255.
    
    
    for p in range(keypoints.shape[0]):
        # draw the keypoints
        cv2.circle(image_input, (int(keypoints[p, 0]), int(keypoints[p, 1])), 3, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
        #uncomment the following lines if you want to put keypoint number
        if keypoint_numbers:
            cv2.putText(image_input, f"{p}", (int(keypoints[p, 0]+10), int(keypoints[p, 1]-5)),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    
    edges = edges_raw
    if len(keypoints) != 17: ## Includes club
        edges.append((17,18))
        
    for ie, e in enumerate(edges):
        # get different colors for the edges
        rgb = matplotlib.colors.hsv_to_rgb([
        ie/float(len(edges)), 1.0, 1.0
        ])
        rgb = rgb*255
        # join the keypoint pairs to draw the skeletal structure
        cv2.line(image_input, (int(keypoints[e, 0][0]), int(keypoints[e, 1][0])),
            (int(keypoints[e, 0][1]), int(keypoints[e, 1][1])),
            tuple(rgb), 2, lineType=cv2.LINE_AA)
    return image_input

cwd = str(Path(os.getcwd()))
###Creating folders for the dataset
pose_dataset_path = cwd + r'\PoseDataset'
basic_frames_path = pose_dataset_path + r'\0_basic_frames'
body_labelled_frames_path = pose_dataset_path + r'\1_body_labelled_frames'
club_labelled_frames_path = pose_dataset_path + r'\2_club_labelled_frames'

df_filepath = body_labelled_frames_path + r'\data.csv'
df_metadata_filepath = club_labelled_frames_path + r'\metadata.csv'

df_loaded = pd.read_csv(df_filepath)
df_loaded = df_loaded.drop("Unnamed: 0", axis = 1)
frames_list = os.listdir(body_labelled_frames_path)
frames_list.remove('data.csv')
random.shuffle(frames_list)

image_width = 500
frame_number = 0
df_metadata = df_loaded.copy()
df_metadata['grip_x'] = -1
df_metadata['grip_y'] = -1
df_metadata['clubhead_x'] = -1
df_metadata['clubhead_y'] = -1

for frame_filename in frames_list:
    loop = True
    while loop:
        golf_club_coordinates = []
        filename =  r'\f' + frame_filename[1:]
        pose_dataset_path
        image_path_unlabelled = basic_frames_path + filename
        image_path_body = body_labelled_frames_path + filename
        image_path_output = club_labelled_frames_path + filename

        # reading the image
        img_unsized = cv2.imread(image_path_body, 1)
        height, width, channel = img_unsized.shape
        width_scale_factor = width/image_width
        img = ResizeWithAspectRatio(img_unsized, width=image_width) 

        # displaying the image
        cv2.imshow('image', img)

        # setting mouse hadler for the image
        # and calling the click_event() function
        cv2.setMouseCallback('image', click_event)

        # wait for a key to be pressed to exit
        while len(golf_club_coordinates) != 2:
            cv2.waitKey(1)

        # close the window
        cv2.destroyAllWindows()

        ## Getting keypoints from df_loaded
        temp_indexing = frame_filename[6:][:-4].split("_")
        video_number_index = int(temp_indexing[0])
        frame_number_index = int(temp_indexing[1])

        row_wanted = df_loaded[(df_loaded.frame_number == frame_number_index) & (df_loaded.video_number == video_number_index)].iloc[0]
        unformatted_keypoints = list(row_wanted.values[2:])
        keypoints = []
        for i in range(0, len(unformatted_keypoints), 2):
            keypoints.append([unformatted_keypoints[i], unformatted_keypoints[i+1]]) #[x,y]
        keypoints_with_club = keypoints
        club_coordinates_scaled = np.multiply(np.array(golf_club_coordinates),width_scale_factor)
        club_coordinates_scaled_int = np.vectorize(int)(club_coordinates_scaled)

        keypoints_with_club = np.vstack([keypoints_with_club, club_coordinates_scaled_int])
        output_image = draw_keypoints(keypoints_with_club, image_path_unlabelled)

        output_image_scaled =  ResizeWithAspectRatio(output_image, width=image_width) 
        cv2.imshow('image', output_image_scaled)
        key = cv2.waitKey(0)
        # a : accept 97
        # d : delete 100
        # r : redraw 114
        cv2.destroyAllWindows()

        if int(key) == 97: # a : accept
            loop = False
            print("Accepted into dataset", filename)
            cv2.imwrite(image_path_output, output_image*255.)
            df_metadata.loc[(df_metadata['frame_number'] == frame_number_index) & (df_metadata['video_number'] == video_number_index), 'grip_x'] = club_coordinates_scaled_int[0][0]
            df_metadata.loc[(df_metadata['frame_number'] == frame_number_index) & (df_metadata['video_number'] == video_number_index), 'grip_y'] = club_coordinates_scaled_int[0][1]
            df_metadata.loc[(df_metadata['frame_number'] == frame_number_index) & (df_metadata['video_number'] == video_number_index), 'clubhead_x'] = club_coordinates_scaled_int[1][0]
            df_metadata.loc[(df_metadata['frame_number'] == frame_number_index) & (df_metadata['video_number'] == video_number_index), 'clubhead_y'] = club_coordinates_scaled_int[1][1]

        
        elif int(key) == 100:
            loop = False
            print("Not included in dataset", filename)
            #Remove row in dataset(Really badly coded by me)
            df_metadata = df_metadata.drop(df_metadata.loc[(df_metadata['frame_number'] == frame_number_index) & (df_metadata['video_number'])].index)
            
        elif int(key) == 114:
            print("Redrawing image", filename)
        else:
            print("Please press: a for accept, d for delete or r for redraw")
            
df_metadata.to_csv(df_metadata_filepath)

Accepted into dataset \frame_1_0.jpg
Not included in dataset \frame_1_2.jpg
Not included in dataset \frame_0_1.jpg
Not included in dataset \frame_1_1.jpg
Accepted into dataset \frame_0_2.jpg
Redrawing image \frame_0_0.jpg
Redrawing image \frame_0_0.jpg
Redrawing image \frame_0_0.jpg
Accepted into dataset \frame_0_0.jpg


In [29]:
"""Temp for displaying video"""

# image_frames_list = os.listdir(pose_dataset_path + r'\backup')
# image_frames_list = sorted(image_frames_list)
# indexes_list = []
# for frame_filename in image_frames_list:
#     temp_indexing = frame_filename[6:][:-4].split("_")
#     frame_number_index = int(temp_indexing[1])
#     indexes_list.append(frame_number_index)
# indexes_list.sort()
# filename_list = [pose_dataset_path + r'\backup' + r'\frame_' + str(0) + '_' +str(frame_number) + '.jpg' for frame_numer in indexes_list]

# import os
# import moviepy.video.io.ImageSequenceClip
# image_folder='backup'
# fps=1

# image_files = [image_folder+'/'+img for img in os.listdir(image_folder) if img.endswith(".jpg")]
# clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(image_files, fps=fps)
# clip.write_videofile('my_video.mp4')