### File to use the trained model on yoga poses to predict the pose of a person from an image
#### Author: Shlok Arjun Marathe
#### Date: 6th December 2024

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import torch
from torchvision import transforms, models
from PIL import Image

In [2]:
# Function to calculate the angle between three points
def calculate_angle(a, b, c):
    a = np.array([a.x, a.y])  
    b = np.array([b.x, b.y])  
    c = np.array([c.x, c.y])  

    # Using the law of cosines to calculate the angle
    radians = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
    angle = np.abs(np.degrees(radians))
    
    if angle > 180.0:
        angle = 360 - angle

    return angle

In [3]:
# Function to detect the correctness of the pose and provide feedback
def provide_feedback(detected_pose, landmarks, ideal_angles):
    feedback = []
    mp_pose = mp.solutions.pose

    # Check the angle of each joint
    for joint, ideal_angle in ideal_angles.items():
        if joint == "left_elbow":
            angle = calculate_angle(
                landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value],
                landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value],
                landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value],
            )
        elif joint == "right_elbow":
            angle = calculate_angle(
                landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value],
                landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value],
                landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value],
            )
        elif joint == "left_knee":
            angle = calculate_angle(
                landmarks[mp_pose.PoseLandmark.LEFT_HIP.value],
                landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value],
                landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value],
            )
        elif joint == "right_knee":
            angle = calculate_angle(
                landmarks[mp_pose.PoseLandmark.RIGHT_HIP.value],
                landmarks[mp_pose.PoseLandmark.RIGHT_KNEE.value],
                landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE.value],
            )

        # Check if the angle is within the acceptable range
        deviation = abs(angle - ideal_angle)
        if deviation > 15:  
            feedback.append(f"{joint} angle is off by {deviation:.1f} degrees.")
    
    # If no feedback is provided, the pose is correct
    if not feedback:
        feedback.append("Your pose looks great!")
    
    return feedback


In [4]:
# Declare the correct angles for each pose
ideal_pose_angles = {
    "Warrior": {
        "left_elbow": 180,
        "right_elbow": 180,
        "left_knee": 130,
        "right_knee": 180,
    },
    "Tree": {
        "left_elbow": 180,
        "right_elbow": 180,
        "left_knee": 180,
        "right_knee": 45,
    },
    "Plank": {
        "left_elbow": 90,
        "right_elbow": 90,
        "left_knee": 180,
        "right_knee": 180,
    },
    "Goddess": {
        "left_elbow": 90,
        "right_elbow": 90,
        "left_knee": 150,
        "right_knee": 150,
    },
    "Downward Dog": {
        "left_elbow": 180,
        "right_elbow": 180,
        "left_knee": 180,
        "right_knee": 180,
    },
}

In [5]:
# Load the model for yoga pose classification
model = models.resnet18(pretrained=False)  
num_features = model.fc.in_features
model.fc = torch.nn.Linear(num_features, 5)  
model.load_state_dict(torch.load("yoga_pose_model.pth"))  
model.eval()

  model.load_state_dict(torch.load("yoga_pose_model.pth"))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
# Define the classes and the mediapipe objects
class_names = ["Downward Dog", "Goddess", "Plank", "Tree", "Warrior"]
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

In [7]:
# Define the preprocessing steps for the image
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [8]:
# Load the image and convert it to RGB
image_path = "./image1.jpg"
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [9]:
# Process the image with the pose model
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        # Draw landmarks on the image
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # Extract landmarks
        height, width, _ = image.shape
        landmarks = [(int(lm.x * width), int(lm.y * height)) for lm in results.pose_landmarks.landmark]

        # Classify pose
        img_pil = Image.fromarray(image_rgb)
        img_tensor = preprocess(img_pil).unsqueeze(0)
        with torch.no_grad():
            outputs = model(img_tensor)
            _, predicted = torch.max(outputs, 1)
            detected_pose = class_names[predicted.item()]

        # Provide feedback
        feedback = provide_feedback(detected_pose, results.pose_landmarks.landmark, ideal_pose_angles[detected_pose])

        # Annotate the image with feedback
        cv2.putText(image, f"Pose: {detected_pose}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        y_offset = 60
        for line in feedback:
            cv2.putText(image, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
            y_offset += 20

        # Show the annotated image
        cv2.imshow("Pose Detection with Feedback", image)
        cv2.waitKey(0)

# Close all windows
cv2.destroyAllWindows()

I0000 00:00:1733675757.726831 28340308 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733675757.799860 28347258 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733675757.813057 28347263 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733675757.835452 28347260 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
2024-12-09 00:35:57.969 Python[56552:28340308] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/jl/sw3zc9fx1m50mbphy9kfgpq00000gn/T/org.python.python.savedState
2024-12-09 00:35:58.355 Python[565