for input video - left hand & right leg

In [3]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)

reference_image = cv2.imread(r'dataset/amit_n_reference.jpg')
threshold = 0.1
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/ntest.mp4'
# input_video_path = r'dataset/test.mp4'
# input_video = cv2.VideoCapture(0)
input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
    "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect"
}


total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))
step = 100 / total_frames

score1 = 0
score2 = 0
score3 = 0

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    shoulder_wrong = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [26, 28, 30, 32]:
                leg_not_above = True
            if frame_keypoint_number == 25:
                leg_not_straight = True
            if frame_keypoint_number == 13:
                hands_not_at_90 = True
            if frame_keypoint_number in [15, 17, 19, 21]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            label = keypoint_labels["Idle"]
        else:
            label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("Processed Video", frame_copy)

    # Check the label and update scores and counts
    if label == keypoint_labels["Perfect"]:
        score1 += step
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"]]:
        score2 += step / 2
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"]]:
        score3 += step / 10
        incorrect_frames += 1

    # ... (Your existing code)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()

final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")


Total Frames: 672
Correct Frames: 0
Partially Correct Frames: 18
Incorrect Frames: 24
Final Score: 0.9821428571428568


live - left hand & right leg

In [5]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
frame_count = 0
total_frames = -1  # Unknown initially
# reference_image = cv2.imread(r'dataset/dhruv_reference.jpg')
reference_image = cv2.imread('d:\One Drive\OneDrive\Pictures\Camera Roll\WIN_20231218_17_11_07_Pro.jpg')
threshold = 0.1
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/ntest.mp4'
# input_video_path = r'dataset/test.mp4'
input_video = cv2.VideoCapture(0)
# input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
    "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect"
}

total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    shoulder_wrong = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [26, 28, 30, 32]:
                leg_not_above = True
            if frame_keypoint_number == 25:
                leg_not_straight = True
            if frame_keypoint_number == 13:
                hands_not_at_90 = True
            if frame_keypoint_number in [15, 17, 19, 21]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            label = keypoint_labels["Idle"]
        else:
            if leg_not_straight:
                label = keypoint_labels["LegNotStraight"]
            else:
                label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("Processed Video", frame_copy)
    if label == keypoint_labels["Perfect"]:
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"]]:
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"]]:
        incorrect_frames += 1
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()
if total_frames == -1:
    # Set total_frames for live video once it's known
    total_frames = frame_count
step = 100 / total_frames

score1 = correct_frames*step
score2 = partially_correct_frames*step/2
score3 = incorrect_frames*step/10
final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")

Total Frames: 164
Correct Frames: 59
Partially Correct Frames: 53
Incorrect Frames: 52
Final Score: 48.96341463414634


live - right hand & left leg

In [14]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
frame_count = 0
total_frames = -1
# reference_image = cv2.imread(r'dataset/dhruv_reference.jpg')
reference_image = cv2.imread('d:\One Drive\OneDrive\Pictures\Camera Roll\WIN_20231218_17_11_18_Pro.jpg')
threshold = 0.1
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/ntest.mp4'
# input_video_path = r'dataset/test.mp4'
input_video = cv2.VideoCapture(0)
# input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
    "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect"
}
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    shoulder_wrong = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [25, 27, 29, 31]:
                leg_not_above = True
            if frame_keypoint_number == 26:
                leg_not_straight = True
            if frame_keypoint_number == 14:
                hands_not_at_90 = True
            if frame_keypoint_number in [16, 18, 20, 22]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            label = keypoint_labels["Idle"]
        else:
            if leg_not_straight:
                label = keypoint_labels["LegNotStraight"]
            else:
                label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("Processed Video", frame_copy)

    if label == keypoint_labels["Perfect"]:
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"]]:
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"]]:
        incorrect_frames += 1
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()
if total_frames == -1:
    # Set total_frames for live video once it's known
    total_frames = frame_count
step = 100 / total_frames

score1 = correct_frames*step
score2 = partially_correct_frames*step/2
score3 = incorrect_frames*step/10
final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")

Total Frames: 284
Correct Frames: 50
Partially Correct Frames: 8
Incorrect Frames: 226
Final Score: 11.056338028169014


advanced for input video - right leg in air

In [2]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
frame_count = 0
total_frames = -1  
# reference_image = cv2.imread(r'dataset/dhruv_reference.jpg')
reference_image = cv2.imread('d:\One Drive\OneDrive\Pictures\Camera Roll\WIN_20231218_17_11_37_Pro.jpg')
threshold = 0.05 #0.13 for antest1
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/antest.mp4'
# input_video_path = r'dataset/test.mp4'
# input_video = cv2.VideoCapture(0)
input_video = cv2.VideoCapture(input_video_path)
output_video_path = 'advancedvriksh/natrajadvtest.mp4'  # Choose your desired output file name
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(input_video.get(cv2.CAP_PROP_FPS))
frame_width = int(input_video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(input_video.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create a VideoWriter object to save the output video.
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
keypoint_labels = {
    "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "KneeNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect",
    "BothWrong": "Incorrect"
}
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    knee_not_above = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [28, 30, 32]:
                leg_not_above = True
            if frame_keypoint_number == 26:
                knee_not_above = True
            if frame_keypoint_number == 25:
                leg_not_straight = True
            if frame_keypoint_number in [13, 14]:
                hands_not_at_90 = True
            if frame_keypoint_number in [15, 17, 19, 21, 16, 18, 20, 22]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            if knee_not_above:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["BothWrong"]
        else:
            label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif knee_not_above:
        label = keypoint_labels["KneeNotAbove"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 255, 255), 2)
    output_video.write(frame_copy)
    cv2.imshow("Processed Video", frame_copy)
    if label == keypoint_labels["Perfect"]:
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"], keypoint_labels["KneeNotAbove"]]:
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"], keypoint_labels["BothWrong"]]:
        incorrect_frames += 1
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
output_video.release()
cv2.destroyAllWindows()
if total_frames == -1:
    total_frames = frame_count
step = 100 / total_frames

score1 = correct_frames*step
score2 = partially_correct_frames*step/1.25
score3 = incorrect_frames*step/10
final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")

Total Frames: 193
Correct Frames: 50
Partially Correct Frames: 65
Incorrect Frames: 78
Final Score: 48.80829015544041


advanced live - right leg in air

In [1]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
frame_count = 0
total_frames = -1  
# reference_image = cv2.imread(r'dataset/dhruv_reference.jpg')
reference_image = cv2.imread('d:\One Drive\OneDrive\Pictures\Camera Roll\WIN_20231218_17_11_48_Pro.jpg')
threshold = 0.1 
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/antest.mp4'
# input_video_path = r'dataset/test.mp4'
input_video = cv2.VideoCapture(0)
# input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
        "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "KneeNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect",
    "BothWrong": "Incorrect"
}
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0
while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    knee_not_above = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [28, 30, 32]:
                leg_not_above = True
            if frame_keypoint_number == 26:
                knee_not_above = True
            if frame_keypoint_number == 25:
                leg_not_straight = True
            if frame_keypoint_number in [13, 14]:
                hands_not_at_90 = True
            if frame_keypoint_number in [15, 17, 19, 21, 16, 18, 20, 22]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            if knee_not_above:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["BothWrong"]
        else:
            label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif knee_not_above:
        label = keypoint_labels["KneeNotAbove"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("Processed Video", frame_copy)
    cv2.imshow("Processed Video", frame_copy)
    if label == keypoint_labels["Perfect"]:
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"], keypoint_labels["KneeNotAbove"]]:
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"], keypoint_labels["BothWrong"]]:
        incorrect_frames += 1
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()
if total_frames == -1:
    total_frames = frame_count
step = 100 / total_frames

score1 = correct_frames*step
score2 = partially_correct_frames*step/2
score3 = incorrect_frames*step/10
final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")

Total Frames: 45
Correct Frames: 0
Partially Correct Frames: 0
Incorrect Frames: 45
Final Score: 0


advanced live - left leg in air

In [8]:
import cv2
import numpy as np
import mediapipe as mp

mp_pose = mp.solutions.pose

def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
frame_count = 0
total_frames = -1 
# reference_image = cv2.imread(r'dataset/dhruv_reference.jpg')
reference_image = cv2.imread('d:\One Drive\OneDrive\Pictures\Camera Roll\WIN_20231218_17_11_37_Pro.jpg')
threshold = 0.1 
pose = mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3, model_complexity=2)

reference_results = pose.process(cv2.cvtColor(reference_image, cv2.COLOR_BGR2RGB))

reference_landmarks = []
if reference_results.pose_landmarks:
    for i, landmark in enumerate(reference_results.pose_landmarks.landmark):
        reference_landmarks.append((landmark.x, landmark.y, i))

input_video_path = r'dataset/antest.mp4'
# input_video_path = r'dataset/test.mp4'
input_video = cv2.VideoCapture(0)
# input_video = cv2.VideoCapture(input_video_path)
keypoint_labels = {
    "Perfect": "Correct",
    "HandsNotAtRightPosition": "Partially Correct",
    "HandsNotAt90": "Partially Correct",
    "LegNotAbove": "Partially Correct",
    "KneeNotAbove": "Partially Correct",
    "Idle": "Incorrect",
    "LegNotStraight": "Incorrect",
    "BothWrong": "Incorrect"
}
total_frames = int(input_video.get(cv2.CAP_PROP_FRAME_COUNT))

correct_frames = 0
partially_correct_frames = 0
incorrect_frames = 0

while True:
    ret, frame = input_video.read()

    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_results = pose.process(frame_rgb)

    frame_landmarks = []
    if frame_results.pose_landmarks:
        for i, landmark in enumerate(frame_results.pose_landmarks.landmark):
            frame_landmarks.append((landmark.x, landmark.y, i))

    if len(frame_landmarks) > 0 and len(reference_landmarks) > 0:
        initial_offset = np.array(reference_landmarks[0][:2]) - np.array(frame_landmarks[0][:2])

    frame_copy = frame.copy()
    leg_not_above = False
    hands_not_at_right_position = False
    hands_not_at_90 = False
    knee_not_above = False
    leg_not_straight = False

    for frame_landmark in frame_landmarks:
        frame_point = frame_landmark[:2]
        frame_keypoint_number = frame_landmark[2]


        adjusted_reference_point = (reference_landmarks[frame_keypoint_number][0] - initial_offset[0], reference_landmarks[frame_keypoint_number][1] - initial_offset[1])
        distance = calculate_distance(adjusted_reference_point, frame_point)
        
        if distance < threshold:
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 255, 0), -1)
        else:
            
            if frame_keypoint_number in [27, 29, 31]:
                leg_not_above = True
            if frame_keypoint_number == 25:
                knee_not_above = True
            if frame_keypoint_number == 26:
                leg_not_straight = True
            if frame_keypoint_number in [13, 14]:
                hands_not_at_90 = True
            if frame_keypoint_number in [15, 17, 19, 21, 16, 18, 20, 22]:
                hands_not_at_right_position = True
            cv2.circle(frame_copy, (int(frame_point[0] * frame.shape[1]), int(frame_point[1] * frame.shape[0])), 5, (0, 0, 255), -1)

    if leg_not_above:
        if hands_not_at_90:
            if knee_not_above:
                label = keypoint_labels["Idle"]
            else:
                label = keypoint_labels["BothWrong"]
        else:
            label = keypoint_labels["LegNotAbove"]
    elif leg_not_straight:
        label = keypoint_labels["LegNotStraight"]
    elif knee_not_above:
        label = keypoint_labels["KneeNotAbove"]
    elif hands_not_at_right_position:
        if hands_not_at_90:
            label = keypoint_labels["HandsNotAt90"]
        else:
            label = keypoint_labels["HandsNotAtRightPosition"]
    else:
        label = keypoint_labels["Perfect"]

    label_lines = label.split('\n')
    for i, line in enumerate(label_lines):
        cv2.putText(frame_copy, line, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("Processed Video", frame_copy)
    cv2.imshow("Processed Video", frame_copy)
    if label == keypoint_labels["Perfect"]:
        correct_frames += 1
    elif label in [keypoint_labels["HandsNotAtRightPosition"], keypoint_labels["HandsNotAt90"], keypoint_labels["LegNotAbove"], keypoint_labels["KneeNotAbove"]]:
        partially_correct_frames += 1
    elif label in [keypoint_labels["LegNotStraight"], keypoint_labels["Idle"], keypoint_labels["BothWrong"]]:
        incorrect_frames += 1
    
    frame_count += 1
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

input_video.release()
cv2.destroyAllWindows()
if total_frames == -1:
    total_frames = frame_count
step = 100 / total_frames

score1 = correct_frames*step
score2 = partially_correct_frames*step/2
score3 = incorrect_frames*step/10
final_score = max(0, score1 + score2 - score3)
print(f"Total Frames: {total_frames}")
print(f"Correct Frames: {correct_frames}")
print(f"Partially Correct Frames: {partially_correct_frames}")
print(f"Incorrect Frames: {incorrect_frames}")
print(f"Final Score: {final_score}")

Total Frames: 87
Correct Frames: 10
Partially Correct Frames: 1
Incorrect Frames: 76
Final Score: 3.333333333333334
