### Pose estimation evaluation metrics

This will be used to get an idea of how accurate YOLOv8/Detectron2 is at extracting the pose of the patient. 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import random
import numpy as np
import re
import tkinter
from tkinter import filedialog

#### Select random frames from a given patient and convert to individual images

In [None]:
# directory where input videos are stored
video_dir = "../Videos/Cropped/OAW06/"

# loop through all files in the video directory
for video_name in os.listdir(video_dir):
    if video_name.endswith(".mp4"):
        # construct the full video path
        video_path = os.path.join(video_dir, video_name)
        print(f"Video {video_name}:")
        
        # converting random frames from the video to images
        cap = cv2.VideoCapture(video_path)
        frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)

        # randomly select three frames
        frame_i = random.sample(range(0, int(frame_count)), 3)

        for i in frame_i:
            # set the current frame position
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            success, image = cap.read()
            if success: 
                # save frame as a jpg image
                cv2.imwrite("temp_images/" + video_name[:-4] + f"-frame{i}.jpg", image)  
                print(f"Frame {i} converted to image.")

#### Or instead, select N random frames from a single video

In [None]:
tkinter.Tk().withdraw() # prevents an empty tkinter window from appearing
video_path = filedialog.askopenfilename(title="input")
video_name = os.path.basename(video_path)

n = 10 # number of frames to select per video

# converting random frames from the video to images
cap = cv2.VideoCapture(video_path)
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)

# randomly select three frames
frame_i = random.sample(range(0, int(frame_count)), n)

for i in frame_i:
    # set the current frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, i)
    success, image = cap.read()
    if success: 
        # save frame as a jpg image
        cv2.imwrite("temp_images/" + video_name[:-4] + f"-frame{i}.jpg", image)  
        print(f"Frame {i} converted to image.")

## Keypoint Annotation Tool

In [None]:
# sections of this code have been created with the help of https://www.geeksforgeeks.org/displaying-the-coordinates-of-the-points-clicked-on-the-image-using-python-opencv/

# list of body keypoints I will be collecting	
keypoints = [
    "RAnkle",
	"LAnkle",
	"RKnee",
	"LKnee",
	"RHip",
	"LHip",
	"RShoulder"
]

kp_coco_idxs = {
    "RAnkle": 16,
	"LAnkle": 15,
	"RKnee": 14,
	"LKnee": 13,
	"RHip": 12,
	"LHip": 11,
	"RShoulder": 6
}

# function to display the coordinates of of the points clicked on the image 
def click_event(event, x, y, flags, params):
	global current_kp_index 

	# checking for left mouse clicks 
	if event == cv2.EVENT_LBUTTONDOWN: 

		clicks[f"{keypoints[current_kp_index]}_x"] = x
		clicks[f"{keypoints[current_kp_index]}_y"] = y

		# displaying the coordinates on the Shell 
		print(x, ' ', y) 

		# displaying the coordinates on the image window 
		font = cv2.FONT_HERSHEY_SIMPLEX 
		# cv2.putText(img, str(x) + ',' + str(y), (x,y), font, 1, (255, 0, 0), 2) 

		# displaying the keypoint number on the image window
		kp_coco_idx = kp_coco_idxs[keypoints[current_kp_index]]
		cv2.putText(img, str(kp_coco_idx), (x,y), font, 1, (255, 0, 0), 2) 
		
		# draw a circle at the click location
		cv2.circle(img, (x, y), 5, (0, 255, 0), -1)

		# check if there are more prompts
		if current_kp_index < len(keypoints) - 1:
			current_kp_index += 1
			display_prompt(img)
		else:
			cv2.putText(img, "All points collected.", (10, 30), font, 0.7, (0, 255, 0), 2)
			cv2.imshow('image', img)


def display_prompt(image):
    prompt = keypoints[current_kp_index]
    img_with_prompt = image.copy()
    cv2.putText(img_with_prompt, prompt, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow('image', img_with_prompt)


def extract_frame_num(im_name):
	# regular expression to extract the number before '.jpg'
	p = r'frame(\d+)\.jpg$'

	return re.search(p, im_name).group(1)


def extract_file_name(im_name):
	# regular expression to extract the string before '-frame' and '.jpg'
	p = r'^(.*)-frame\d+\.jpg$'

	return re.search(p, im_name).group(1) + ".mp4"


if __name__=="__main__": 

	img_directory = 'temp_images'
	output = pd.DataFrame()

	for img_name in os.listdir(img_directory):
		if img_name.endswith(('.png', '.jpg', '.jpeg')):
			img_path = os.path.join(img_directory, img_name)

            # Reading the image
			img = cv2.imread(img_path, 1)
			# img = cv2.resize(img, (int(1080/2), int(1920/2))) # resizing as original size too big to view on monitor

            # initialize the current keypoint index and clicks dictionary
			current_kp_index = 0

			clicks = {f"{name}_x": 0 for name in keypoints}
			clicks.update({f"{name}_y": 0 for name in keypoints})
			clicks["file_name"] = extract_file_name(img_name)
			clicks["frame"] = extract_frame_num(img_name)
			

            # display the initial prompt
			display_prompt(img)

            # setting mouse handler for the image and calling the click_event() function
			cv2.setMouseCallback("image", click_event)

            # wait for a key to be pressed to exit
			key = cv2.waitKey(0)
			if key == ord("2"):
				# resizing as original size too big to view on monitor
				img = cv2.resize(img, (int(1080/2), int(1920/2))) 
				display_prompt(img)
				cv2.waitKey(0)
				
				# multiplying all values in the df by 2 to account for the resizing prior to annotation
				clicks = {key: (value * 2 if isinstance(value, int) else value) for (key, value) in clicks.items()}

			cv2.destroyAllWindows()

			output = pd.concat([output,  pd.DataFrame([clicks])], ignore_index=True)

	output.to_csv(f"../Keypoint-Annotations/{img_name[:5]}.csv")


### PDJ - Percentage of Detected Joints 

Calculated by finding the torso diameter (diagonal from shoulder to hip), then checking if the distance from the true keypoint to the predicted keypoint is < 0.2 * torso diameter

In [None]:
import tkinter
from tkinter import filedialog
from scipy.spatial import distance

keypoints = [
    "RAnkle",
	"LAnkle",
	"RKnee",
	"LKnee",
	"RHip",
	"LHip"
]

def calculate_pdj_thold(row):
    # calculate the PDJ threshold = 0.2 * torso diameter (distance from left hip to right shoulder)
    
    torso_diameter = distance.euclidean(row[["RShoulder_x", "RShoulder_y"]], row[["LHip_x", "LHip_y"]]) # distance from left hip to right shoulder
    return torso_diameter * 0.2

def find_prediction(row, model_type):
    if model_type == "detectron":
        pred_path = f"detectron_exports/{row['file_name'][:-4]}.csv"
        pred_df = pd.read_csv(pred_path)
    elif model_type == "yolo":
        pred_path = f"yolo_exports/{row['file_name'][:-4]}.csv"
        pred_df = pd.read_csv(pred_path)
    else:
        raise ValueError("Invalid model type.")
    

    return pred_df.loc[int(row["frame"])]


def calculate_kp_distances(row, kp):
    return distance.euclidean(row[[f"{kp}_x", f"{kp}_y"]], row[[f"{kp}_x_p", f"{kp}_y_p"]])


def calculate_kp_detected(row, kp):
    if row[f"{kp}_distance"] < row["PDJ_thold"]:
        return 1
    else:  
        return 0


if __name__=="__main__": 
    # load annotation file
    tkinter.Tk().withdraw() # prevents an empty tkinter window from appearing
    pose_labels_path = filedialog.askopenfilename(title="Select label file")
    labels_df = pd.read_csv(pose_labels_path, index_col=0)

    # create a df of the predictions that correspond to each row in the annotation df
    # For YOLO
    pred_df_yolo = pd.DataFrame()
    pred_df_yolo = labels_df.apply(lambda row: find_prediction(row, model_type="yolo"), axis=1)
    pred_df_yolo = pred_df_yolo.rename(columns=lambda x: x + '_p') # distinguish between annotation and prediction columns

    # For Detectron
    pred_df_detectron = pd.DataFrame()
    pred_df_detectron = labels_df.apply(lambda row: find_prediction(row, model_type="detectron"), axis=1)
    pred_df_detectron = pred_df_detectron.rename(columns=lambda x: x + '_p') # distinguish between annotation and prediction columns

    # add the prediction and label dfs together
    df_yolo = pd.concat([labels_df, pred_df_yolo], axis=1)
    df_detectron = pd.concat([labels_df, pred_df_detectron], axis=1)

    # calculate the PDJ threshold for each frame in the annotation file
    df_yolo["PDJ_thold"] = labels_df.apply(calculate_pdj_thold, axis=1)
    df_detectron["PDJ_thold"] = labels_df.apply(calculate_pdj_thold, axis=1)

    # calculate the distance between the predicted and true kp location
    print("YOLO:")
    for kp in keypoints:
        df_yolo[f"{kp}_distance"] = df_yolo.apply(lambda x: calculate_kp_distances(x, kp), axis=1)
        df_yolo[f"{kp}_detected"] = df_yolo.apply(lambda x: calculate_kp_detected(x, kp), axis=1)
        print(f'The PDJ for the {kp} is {len(df_yolo[df_yolo[f"{kp}_detected"] == 1]) / len(df_yolo[f"{kp}_detected"]) * 100}')
        df_yolo.to_csv("yolo_PDJ.csv", index=False)
    
    print("Detectron:")
    for kp in keypoints:
        df_detectron[f"{kp}_distance"] = df_detectron.apply(lambda x: calculate_kp_distances(x, kp), axis=1)
        df_detectron[f"{kp}_detected"] = df_detectron.apply(lambda x: calculate_kp_detected(x, kp), axis=1)
        print(f'The PDJ for the {kp} is {len(df_detectron[df_detectron[f"{kp}_detected"] == 1]) / len(df_detectron[f"{kp}_detected"]) * 100}')
        df_detectron.to_csv("detectron_PDJ.csv", index=False)
    
    print("complete")

#### Check the performance if you swap left and right keypoints

In [None]:
def swap_L_R_columns(df):
    # swaps 'L' and 'R' in the first character of column names
    def swap_first_letter(col_name):
        if col_name.endswith('_p'):
            if col_name.startswith('L'):
                return 'R' + col_name[1:]
            elif col_name.startswith('R'):
                return 'L' + col_name[1:]
        else:
            return col_name

    # apply function to all column names
    df = df.rename(columns=swap_first_letter)
    
    return df

df_yolo = pd.read_csv("detectron_PDJ.csv")
df_yolo_inverse = pd.read_csv("detectron_PDJ.csv")
df_yolo_inverse = swap_L_R_columns(df_yolo_inverse)

metrics_df = pd.DataFrame()

for kp in keypoints:
    # calculate distances and detection for inverse DataFrame
    df_yolo_inverse[f"{kp}_distance"] = df_yolo_inverse.apply(lambda x: calculate_kp_distances(x, kp), axis=1)
    df_yolo_inverse[f"{kp}_detected"] = df_yolo_inverse.apply(lambda x: calculate_kp_detected(x, kp), axis=1)

    # calculate the percentage of rows where inverse detection is 1 and original detection is 0
    condition = (df_yolo_inverse[f"{kp}_detected"] == 1) & (df_yolo[f"{kp}_detected"] == 0)
    percentage = len(df_yolo_inverse[condition]) / len(df_yolo_inverse) * 100

    print(f'The percentage of {kp} detected as 1 in df_yolo_inverse and 0 in df_yolo is {percentage:.2f}%')

    

#### Calculate the range of confidence values per group (detected/not detected based on PDJ)

In [197]:
metrics = {
    'Keypoint': [],
    'Min Conf (Failed PDJ)': [],
    'Max Conf (Failed PDJ)': [],
    'Min Conf (Passed PDJ)': [],
    'Max Conf (Passed PDJ)': []
}

for keypoint_name in keypoints:
    df_yolo[f'{keypoint_name}_normalized_distance'] = df_yolo[f'{keypoint_name}_distance'] / df_yolo['PDJ_thold']

    # create a mask for blue points
    blue_mask = (df_yolo[f'{keypoint_name}_detected'] == 1) | (df_yolo_inverse[f'{keypoint_name}_detected'] == 1)

    # separate data based on the mask
    blue_points = df_yolo[blue_mask]
    red_points = df_yolo[~blue_mask]

    # calculate min/max values
    min_conf_failed = red_points[f'{keypoint_name}_conf_p'].min()
    max_conf_failed = red_points[f'{keypoint_name}_conf_p'].max()
    min_conf_passed = blue_points[f'{keypoint_name}_conf_p'].min()
    max_conf_passed = blue_points[f'{keypoint_name}_conf_p'].max()
    
    # append the results to the dictionary
    metrics['Keypoint'].append(keypoint_name)
    metrics['Min Conf (Failed PDJ)'].append(min_conf_failed)
    metrics['Max Conf (Failed PDJ)'].append(max_conf_failed)
    metrics['Min Conf (Passed PDJ)'].append(min_conf_passed)
    metrics['Max Conf (Passed PDJ)'].append(max_conf_passed)


metrics_df = pd.DataFrame(metrics)

metrics_df.to_csv("yolo_pdj_metrics.csv")


#### Calculate optimal confidence thresholds

In [None]:
optimal_thresholds = {}

for keypoint_name in keypoints:
    # extract the confidence scores for blue and red points
    blue_conf = df_yolo.loc[(df_yolo[f'{keypoint_name}_detected'] == 1) | 
                            (df_yolo_inverse[f'{keypoint_name}_detected'] == 1), f'{keypoint_name}_conf_p']
    red_conf = df_yolo.loc[~((df_yolo[f'{keypoint_name}_detected'] == 1) | 
                             (df_yolo_inverse[f'{keypoint_name}_detected'] == 1)), f'{keypoint_name}_conf_p']

    # calculate the 90th percentile of blue points' confidence scores
    threshold_90 = np.percentile(blue_conf.dropna(), 10)  # 10th percentile gives the lowest value for 90% of blue points above the threshold

    # initialize variables for the best threshold and best percentages
    best_threshold = threshold_90
    best_blue_above = (blue_conf >= threshold_90).mean()  # percentage of blue points above the initial threshold
    best_red_below = (red_conf < threshold_90).mean()  # percentage of red points below the initial threshold

    # ensure at least 90% of blue points are above the initial threshold 
    if not np.isclose(best_blue_above, 0.9, atol=0.001) and best_blue_above < 0.9:
        print(f"Warning: Initial threshold_90 results in {best_blue_above * 100:.2f}% blue points above threshold for {keypoint_name}. Adjusting threshold.")

        # adjust threshold slightly upwards to ensure 90% of blue points are above it
        threshold_90 = np.percentile(blue_conf.dropna(), 9.9)  # Adjust slightly upward to ensure 90% are above
        best_threshold = threshold_90
        best_blue_above = (blue_conf >= threshold_90).mean()  # Recalculate with adjusted threshold
        best_red_below = (red_conf < threshold_90).mean()

    # evaluate possible thresholds from 90th percentile down to the minimum blue confidence score
    for threshold in np.linspace(threshold_90, blue_conf.min(), 100):
        blue_above = (blue_conf >= threshold).mean()  # Percentage of blue points above the current threshold
        
        # calculate the percentage of red points below the current threshold
        red_below = (red_conf < threshold).mean()  # Percentage of red points below the current threshold

        # find the threshold that maximizes the red points below the threshold
        if red_below > best_red_below:
            best_threshold = threshold
            best_red_below = red_below
            best_blue_above = blue_above

    # store the best threshold for the current keypoint
    optimal_thresholds[keypoint_name] = best_threshold

    # print the best threshold found for this keypoint
    print(f"Optimal threshold for {keypoint_name}: {best_threshold:.4f} | % of Blue Points Above: {best_blue_above * 100:.2f}% | % of Red Points Below: {best_red_below * 100:.2f}%\n")

# Output the optimal thresholds for each keypoint
print("\nOptimal thresholds for all keypoints:")
for keypoint_name, threshold in optimal_thresholds.items():
    print(f"{keypoint_name}: {threshold:.4f}")


#### Plot data of a single keypoint

In [None]:
keypoint_name = "LHip"

# calculate normalized distance (distance / PDJ threshold)
df_yolo[f'{keypoint_name}_normalized_distance'] = df_yolo[f'{keypoint_name}_distance'] / df_yolo['PDJ_thold']

# create a mask for blue points
blue_mask = (df_yolo[f'{keypoint_name}_detected'] == 1) | (df_yolo_inverse[f'{keypoint_name}_detected'] == 1)

# separate data based on the mask
blue_points = df_yolo[blue_mask]
red_points = df_yolo[~blue_mask]

print(f"Minimum {keypoint_name} confidence score of points who failed PDJ threshold: {red_points.LHip_conf_p.min()}")

# Create scatter plot
plt.figure(figsize=(8, 6))

# plot detected points in blue
plt.scatter(blue_points[f'{keypoint_name}_conf_p'], blue_points[f'{keypoint_name}_normalized_distance'], 
            color='blue', alpha=0.7, label='Within PDJ Threshold')
# plot not detected points in red
plt.scatter(red_points[f'{keypoint_name}_conf_p'], red_points[f'{keypoint_name}_normalized_distance'], 
            color='red', alpha=0.7, label='Outside PDJ Threshold')

# add titles and legend
plt.xlabel(f'{keypoint_name} Estimation Confidence Score')
plt.ylabel('Normalized Distance from True Label')
plt.title(f'Relationship Between YOLOv8 {keypoint_name} Estimation Confidence and Distance from Labelled Keypoint')
plt.title(keypoint_name, fontsize=18)
plt.legend()
plt.grid(True)

plt.show()

#### Plot combined keypoint data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

keypoints = ["RAnkle", "LAnkle", "RKnee", "LKnee", "RHip", "LHip"]
   
combined_df = pd.DataFrame()

for kp in keypoints:
    # calculate normalized distance for each keypoint
    df_yolo[f'{kp}_normalized_distance'] = df_yolo[f'{kp}_distance'] / df_yolo['PDJ_thold']
    
    # extract relevant columns and add to combined df
    kp_df = pd.DataFrame({
        'Confidence': df_yolo[f'{kp}_conf_p'],
        'Normalized Distance': df_yolo[f'{kp}_normalized_distance'],
        'Detected': (df_yolo[f'{kp}_detected'] == 1) | (df_yolo_inverse[f'{kp}_detected'] == 1)
    })
    
    combined_df = pd.concat([combined_df, kp_df], ignore_index=True)

# separate data based on detection status
detected_points = combined_df[combined_df['Detected']]
not_detected_points = combined_df[~combined_df['Detected']]

# create scatter plot
plt.figure()

# plot detected points in blue
plt.scatter(detected_points['Confidence'], detected_points['Normalized Distance'], 
            color='blue', alpha=0.7, label='Within PDJ Threshold')

# plot not detected points in red
plt.scatter(not_detected_points['Confidence'], not_detected_points['Normalized Distance'], 
            color='red', alpha=0.7, label='Outside PDJ Threshold')

# add titles and legend
plt.xlabel('Keypoint Estimation Confidence Score')
plt.ylabel('Normalized Distance From True Label')
plt.title('YOLOv8')
plt.legend()
plt.ylim(top=61)
plt.grid(True)


print("Mean distance from true label: " + str(not_detected_points['Normalized Distance'].mean()))

plt.show()
