In [1]:
import os
from matplotlib import pyplot as plt
import mediapipe as mp
from tensorflow import keras
from keras.models import Model
from keras.layers import Dense
import pandas as pd
import cv2
import numpy as np
import math

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_pose = mp.solutions.pose
mp_drawing_styles = mp.solutions.drawing_styles
pose = mp_pose.Pose()

In [6]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def calculate_angles(firstPoint, midPoint, lastPoint):
  # Same formula from the Android App for consistency
  result = math.degrees(math.atan2(lastPoint.y  - midPoint.y, lastPoint.x - midPoint.x) - math.atan2(firstPoint.y - midPoint.y, firstPoint.x - midPoint.x))
  result = abs(result) # Angle should never be negative
  # 
  if result > 180:
      result = 360.0 - result # Always get the acute representation of the angle
  return result


def get_standardized_angle(pose_angles):
  average = sum(pose_angles) / len(pose_angles)
  return round(average)

def extract_angles(results):
  if results.pose_landmarks is None:
      return None
  
  # Calculates the essential angles for each image and adds them to the array
  left_elbow_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST] , results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER])
  right_elbow_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER])
  left_shoulder_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP])
  right_shoulder_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP])
  left_hip_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE])
  right_hip_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE])
  left_knee_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HEEL])
  right_knee_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HEEL])

  golfdataset = pd.DataFrame([[left_elbow_angles, right_elbow_angles, left_shoulder_angles, right_shoulder_angles, left_hip_angles, left_knee_angles, left_knee_angles, right_knee_angles, 5]], columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', "Label"])
  # golfdataset.to_csv('GolfMediaPipeData.csv', mode='a', index=False, header=False)


In [14]:
# Set up Mediapipe Pose
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Set minimum confidence levels
min_detection_confidence = 0.5
min_tracking_confidence = 0.5

# Define the path to the directory containing the images
image_dir = './GolfDB_Dataset/Mid-Follow-Through/'

# Get a list of all the image filenames in the directory
image_filenames = [os.path.join(image_dir, filename) for filename in os.listdir(image_dir)
                   if os.path.isfile(os.path.join(image_dir, filename))]

# Sort the filenames in ascending order by their numeric value
image_filenames = sorted(image_filenames, key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))

ctr = 0
# Loop over each image filename
for filename in image_filenames:
    # Read in the image
    image = cv2.imread(filename)

    # Convert the image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Detect the pose
    with mp_pose.Pose(min_detection_confidence=min_detection_confidence,
                      min_tracking_confidence=min_tracking_confidence) as pose:
        results = pose.process(image)

        # Check if pose_landmarks attribute is not found
        if results.pose_landmarks is None:
            print(f"Pose landmarks not found in {filename}")
        else:
            keypoints = extract_angles(results)
            ctr+=1

            # Get the height and width of the person in the image
            height = int((results.pose_landmarks.landmark[mp_pose.PoseLandmark.NOSE].y * image.shape[0]) -
                        (results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP].y * image.shape[0]))
            width = int((results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].x * image.shape[1]) -
                        (results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].x * image.shape[1]))

            # Calculate the aspect ratio and resize the image
            aspect_ratio = width / height
            if height > 160 or width > 160:
                if height > width:
                    new_height = 160
                    new_width = int(new_height * aspect_ratio)
                else:
                    new_width = 160
                    new_height = int(new_width / aspect_ratio)
                image = cv2.resize(image, (new_width, new_height))

            # Draw all pose landmarks on the image
            annotated_image = image.copy()
            mp_drawing.draw_landmarks(annotated_image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            # Display the annotated image
            cv2.imshow('Pose Detection', annotated_image)

        if ctr >= 450:
            break

    # if cv2.waitKey(0) & 0xFF == ord('e'):
    #     # cv2.destroyAllWindows()
    #     break

# Close the window
cv2.destroyAllWindows()

Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/59.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/111.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/223.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/226.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/311.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/502.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/618.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/701.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/702.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/792.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/1200.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/1237.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Through/1238.jpg
Pose landmarks not found in ./GolfDB_Dataset/Mid-Follow-Throug

In [4]:
dataset = pd.read_csv("C:\\Users\\Crim\\Desktop\\RESEARCH_THESIS\\golfpose\\GolfMediaPipeData.csv")
dataset.head()

Unnamed: 0.1,Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,Label
0,173.38268,162.905673,1.682428,1.951471,157.318779,176.730082,176.730082,178.661229,1,
1,165.253038,156.930396,7.233925,1.591685,153.67172,173.217956,173.217956,177.346512,1,
2,161.581891,162.968698,3.53505,1.496108,169.982488,170.023857,170.023857,179.863322,1,
3,169.232114,162.796697,0.613883,0.804719,173.916213,176.045937,176.045937,177.358669,1,
4,173.758449,175.173183,3.802594,0.828725,162.272318,174.503925,174.503925,171.857373,1,


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import learning_curve

# Load the dataset
df = pd.read_csv('GolfMediaPipeData.csv' )
X = df[['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']]
X = X.values
print(X)
y = df[['Label']]
y = y.values

[[173.38268005 162.90567338   1.68242825 ... 176.73008236 176.73008236
  178.6612294 ]
 [165.25303778 156.93039561   7.23392467 ... 173.21795634 173.21795634
  177.34651214]
 [161.5818915  162.96869754   3.5350505  ... 170.02385739 170.02385739
  179.86332157]
 ...
 [177.53556858 153.62933603  11.0256545  ... 179.44972263 179.44972263
  157.96385748]
 [173.92439152 169.73621312  21.28184849 ... 172.93674048 172.93674048
  156.47654847]
 [151.75431044 147.30369408  34.05415923 ... 176.65252462 176.65252462
  179.43459679]]


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# initialize a StandardScaler object
scaler = StandardScaler()

# fit the scaler to the data and transform it
X_scaled = scaler.fit_transform(X)

# initialize an SVM model with linear kernel
clf = SVC(kernel='linear', C=1)

# fit the model to the scaled data
clf.fit(X_scaled, y)

# make predictions on the scaled data
y_pred = clf.predict(X_scaled)

print(y_pred)
accuracy = accuracy_score(y, y_pred)
print("Accuracy:", accuracy)

  y = column_or_1d(y, warn=True)


[1 1 1 ... 2 5 5]
Accuracy: 0.8026905829596412


In [12]:
# split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# initialize a StandardScaler object
scaler = StandardScaler()

# fit the scaler to the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# transform the test data using the scaler fitted to the training data
X_test_scaled = scaler.transform(X_test)

# initialize an SVM model with linear kernel
clf = SVC(kernel='linear', C=1, probability=True)

# fit the model to the scaled training data
clf.fit(X_train_scaled, y_train)

# make predictions on the scaled test data
y_pred = clf.predict(X_test_scaled)

print(y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

  y = column_or_1d(y, warn=True)


[2 4 2 2 2 5 1 2 3 3 1 5 1 1 5 5 5 2 2 1 4 1 4 2 3 1 4 1 3 4 2 3 5 5 2 4 3
 3 4 2 4 1 1 2 5 3 2 5 4 5 2 4 2 3 3 3 4 4 2 4 1 5 4 3 1 1 2 3 1 4 5 2 3 5
 5 4 4 3 5 5 5 1 3 1 2 4 4 3 1 5 1 5 5 4 1 2 1 4 2 2 4 4 3 2 2 4 5 1 2 4 4
 4 2 1 5 1 4 1 1 5 1 1 2 2 5 5 1 4 3 4 4 5 1 2 5 5 4 3 5 5 1 1 3 2 1 2 2 1
 4 3 3 2 5 3 1 2 3 1 4 4 4 2 4 2 3 3 2 2 3 3 1 1 1 1 2 2 3 2 4 4 2 4 3 1 5
 5 4 3 1 1 1 2 1 3 2 1 3 5 1 4 4 2 1 5 2 1 5 4 2 5 4 1 4 1 2 1 3 2 4 1 2 3
 2 4 3 5 1 5 4 1 3 5 3 5 2 2 2 5 2 5 5 3 2 4 5 5 4 2 1 3 1 4 4 2 2 3 4 2 2
 2 3 5 3 1 1 4 3 3 4 4 5 5 3 4 5 4 4 1 4 5 1 2 4 4 5 4 3 2 1 3 4 4 4 4 4 2
 3 1 4 3 4 1 2 3 4 4 1 3 3 1 2 2 3 5 3 4 2 3 3 4 3 5 5 5 2 5 5 3 5 2 4 2 3
 1 1 2 3 1 1 5 2 5 2 4 1 5 2 4 2 1 5 5 5 5 1 3 5 5 1 2 1 4 5 1 4 2 3 5 5 3
 4 3 5 4 5 3 5 3 5 4 2 4 4 1 4 2 4 4 2 1 2 5 2 5 1 2 4 1 2 3 5 1 4 2 5 2 4
 2 2 2 4 1 1 1 5 1 3 4 1 1 1 4 2 2 1 4 4 4 1 3 3 5 1 3 2 5 4 3 4 3 4 2 3 3
 3 1]
Accuracy: 0.7892376681614349


In [13]:
# obtain the decision function values for the test data
decision_values = clf.decision_function(X_test_scaled)

# find the index of the highest decision function value for each test sample
max_indices = decision_values.argmax(axis=1)

# get the corresponding class labels for the highest decision function values
max_labels = clf.classes_[max_indices]

# print the highest confidence level for each label
for i in range(5):
    max_confidence = decision_values[:,i][max_indices == i].max()
    print("Highest confidence for label", i, ":", max_confidence)

Highest confidence for label 0 : 4.312710191332661
Highest confidence for label 1 : 4.3096315442480675
Highest confidence for label 2 : 4.314458375911905
Highest confidence for label 3 : 4.317306988215166
Highest confidence for label 4 : 4.318833734344646


In [15]:
train_sizes = np.linspace(0.1, 1.0, 20)
train_sizes, train_scores, test_scores = learning_curve(
    clf, X, y, train_sizes=train_sizes, cv=5)

# calculate the mean and standard deviation of the training and validation scores
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

# plot the learning curve
plt.plot(train_sizes, train_mean, label='Training score')
plt.plot(train_sizes, test_mean, label='Cross-validation score')
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1)
plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1)
plt.legend(loc='best')
plt.xlabel('Number of training samples')
plt.ylabel('Accuracy')
plt.title('Learning curve for SVM with RBF kernel')
plt.show()

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


KeyboardInterrupt: 

In [24]:
def extract_angles_test(results):
  if results.pose_landmarks is None:
      return None
  
  # Calculates the essential angles for each image and adds them to the array
  left_elbow_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_WRIST] , results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER])
  right_elbow_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_WRIST], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER])
  left_shoulder_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP])
  right_shoulder_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_ELBOW], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP])
  left_hip_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE])
  right_hip_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE])
  left_knee_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE], results.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HEEL])
  right_knee_angles = calculate_angles(results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE], results.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HEEL])

  return [[left_elbow_angles, right_elbow_angles, left_shoulder_angles, right_shoulder_angles, left_hip_angles, right_hip_angles, left_knee_angles, right_knee_angles]]

In [25]:
import cv2
import mediapipe as mp
import numpy as np

# Set up Mediapipe Pose
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Set minimum confidence levels
min_detection_confidence = 0.5
min_tracking_confidence = 0.5

# Define the path to the video file
video_file = './8.mp4'

# Open the video file
cap = cv2.VideoCapture(video_file)

# Initialize dictionary to store highest confidence level frame for each predicted label
highest_conf_frames = {}

# Loop over each frame of the video
while cap.isOpened():
    # Read in the frame
    ret, frame = cap.read()

    if not ret:
        break

    # Convert the frame to RGB
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect the pose
    with mp_pose.Pose(min_detection_confidence=min_detection_confidence,
                      min_tracking_confidence=min_tracking_confidence) as pose:
        results = pose.process(frame)
        test_value = extract_angles_test(results)
        print(test_value)
        test_scaled_value = scaler.transform(test_value)
        test_probabilities = clf.predict_proba(test_scaled_value)
        test_prediction = np.argmax(test_probabilities)
        test_confidence = test_probabilities[0][test_prediction] * 100
        print(f"Predicted label: {test_prediction}, confidence: {test_confidence:.2f}%")

        # Check if pose_landmarks attribute is not found or does not contain all expected landmarks
        if not results.pose_landmarks or len(results.pose_landmarks.landmark) != 33:
            continue

        # Draw the pose landmarks on the image
        annotated_image = frame.copy()

        # Check if current frame has highest confidence level for predicted label
        if test_prediction not in highest_conf_frames or test_confidence > highest_conf_frames[test_prediction][0]:
            # Update dictionary with current frame as new highest confidence level frame
            highest_conf_frames[test_prediction] = (test_confidence, annotated_image)

    # Display the annotated image
    cv2.imshow('Pose Detection', annotated_image)

    # Exit the loop when the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

# Display the highest confidence level frames for each predicted label
for label, (confidence, frame) in highest_conf_frames.items():
    print(f"Label {label} has highest confidence level of {confidence:.2f}%")
    cv2.imshow(f"Label {label}", frame)
    cv2.waitKey(0)
cap.release()
cv2.destroyAllWindows()

[[171.82167871009224, 168.20648008727824, 6.981515830825712, 2.32970142952243, 157.42764786824966, 159.71238191553053, 174.02851029840014, 176.67957640486705]]
Predicted label: 0, confidence: 90.02%
[[168.22566303066594, 167.35876478613838, 7.849576095702405, 2.5038878448281814, 156.17630810795367, 160.4210417712123, 171.2040209569182, 176.17690388990206]]
Predicted label: 0, confidence: 90.96%
[[169.48910272832177, 168.13606641527429, 7.628018878527785, 3.117241423323473, 157.04535897816393, 159.64614214159784, 173.9238513986152, 176.92269886587647]]
Predicted label: 0, confidence: 90.17%
[[171.180769363857, 168.18164015810535, 7.065491704723987, 3.4129377802438166, 156.7452967749249, 160.7191082841294, 172.68788110151206, 173.62466031187893]]
Predicted label: 0, confidence: 89.13%
[[170.60110828069406, 168.7416955136977, 7.5051705119166, 5.3661523879664115, 155.494537853877, 160.19551076386065, 172.88861515101735, 176.35850607299798]]
Predicted label: 0, confidence: 91.28%
[[168.6310