## Make Detection with the Trained Model

In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd

import pickle

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

objc[79753]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x107da4860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece480). One of the two will be used. Which one is undefined.
objc[79753]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x1077c4a68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece4d0). One of the two will be used. Which one is undefined.
objc[79753]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x1077c4a90) and /Users/fuixlabsdev1/Programming/PP/graduation-th

### Reconstruct the input structure

In [2]:
# Determine important landmarks for plank
IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
    "LEFT_HEEL",
    "RIGHT_HEEL",
    "LEFT_FOOT_INDEX",
    "RIGHT_FOOT_INDEX",
]

# Generate all columns of the data frame

HEADERS = ["label"] # Label column

for lm in IMPORTANT_LMS:
    HEADERS += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

### Setup some important functions

In [3]:
def extract_important_keypoints(results) -> list:
    '''
    Extract important keypoints from mediapipe pose detection
    '''
    landmarks = results.pose_landmarks.landmark

    data = []
    for lm in IMPORTANT_LMS:
        keypoint = landmarks[mp_pose.PoseLandmark[lm].value]
        data.append([keypoint.x, keypoint.y, keypoint.z, keypoint.visibility])
    
    return np.array(data).flatten().tolist()


def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation =cv2.INTER_AREA)

In [8]:
# VIDEO_PATH1 = "../data/plank/plank_test.mov"
# VIDEO_PATH2 = "../data/plank/plank_test_1.mp4"
# VIDEO_PATH3 = "../data/plank/plank_test_2.mp4"
# VIDEO_PATH4 = "../data/plank/plank_test_3.mp4"
# VIDEO_PATH5 = "../data/plank/plank_test_4.mp4"
VIDEO_TEST = "../../demo/plank_demo.mp4"

## 1. Make detection with Scikit learn model

In [11]:
# Load model
with open("./model/LR_model.pkl", "rb") as f:
    sklearn_model = pickle.load(f)

# Load input scaler
with open("./model/input_scaler.pkl", "rb") as f2:
    input_scaler = pickle.load(f2)

# Transform prediction into class
def get_class(prediction: float) -> str:
    return {
        0: "C",
        1: "H",
        2: "L",
    }.get(prediction)


In [None]:
cap = cv2.VideoCapture(VIDEO_TEST)
current_stage = ""
prediction_probability_threshold = 0.6

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, image = cap.read()

        if not ret:
            break

        # Reduce size of a frame
        image = rescale_frame(image, 50)
        # image = cv2.flip(image, 1)

        # Recolor image from BGR to RGB for mediapipe
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        results = pose.process(image)

        if not results.pose_landmarks:
            print("No human found")
            continue

        # Recolor image from BGR to RGB for mediapipe
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Draw landmarks and connections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(244, 117, 66), thickness=2, circle_radius=2), mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=1))

        # Make detection
        try:
            # Extract keypoints from frame for the input
            row = extract_important_keypoints(results)
            X = pd.DataFrame([row], columns=HEADERS[1:])
            X = pd.DataFrame(input_scaler.transform(X))

            # Make prediction and its probability
            predicted_class = sklearn_model.predict(X)[0]
            predicted_class = get_class(predicted_class)
            prediction_probability = sklearn_model.predict_proba(X)[0]
            # print(predicted_class, prediction_probability)

            # Evaluate model prediction
            if predicted_class == "C" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold:
                current_stage = "Correct"
            elif predicted_class == "L" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold: 
                current_stage = "Low back"
            elif predicted_class == "H" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold: 
                current_stage = "High back"
            else:
                current_stage = "unk"
            
            # Visualization
            # Status box
            cv2.rectangle(image, (0, 0), (250, 60), (245, 117, 16), -1)

            # Display class
            cv2.putText(image, "CLASS", (95, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, current_stage, (90, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            # Display probability
            cv2.putText(image, "PROB", (15, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(round(prediction_probability[np.argmax(prediction_probability)], 2)), (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        except Exception as e:
            print(f"Error: {e}")
        
        cv2.imshow("CV2", image)
        
        # Press Q to close cv2 window
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    for i in range (1, 5):
        cv2.waitKey(1)
  

## 2. Make detection in Real time 


prediction using Scikit ML in realtime through webcam


In [2]:
import cv2
import mediapipe as mp
import pickle
import pandas as pd
import numpy as np

# Load model
with open("C:/Users/Abdul Rehman/Desktop/form correction 2nd github/Training my own plank model/model/LR_model.pkl", "rb") as f:
    sklearn_model = pickle.load(f)

# Load input scaler
with open("C:/Users/Abdul Rehman/Desktop/form correction 2nd github/Training my own plank model/model/input_scaler.pkl", "rb") as f2:
    input_scaler = pickle.load(f2)

# Initialize MediaPipe
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Transform prediction into class
def get_class(prediction: float) -> str:
    return {
        0: "C",
        1: "H",
        2: "L",
    }.get(prediction)

# Replace with your function to extract keypoints
def extract_important_keypoints(results):
    keypoints = []
    for landmark in results.pose_landmarks.landmark:
        keypoints.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
    return keypoints

# Initialize webcam
cap = cv2.VideoCapture(0)
current_stage = ""
prediction_probability_threshold = 0.6

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, image = cap.read()

        if not ret:
            print("Failed to grab frame")
            break

        # Resize frame if needed
        # image = rescale_frame(image, 50)  # Optional if you define rescale_frame

        # Recolor image from BGR to RGB for mediapipe
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False

        results = pose.process(image_rgb)

        # Recolor back to BGR for OpenCV
        image_rgb.flags.writeable = True
        image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        # Draw landmarks
        if results.pose_landmarks:
            mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            try:
                # Extract keypoints and scale input
                row = extract_important_keypoints(results)
                X = pd.DataFrame([row])
                X = pd.DataFrame(input_scaler.transform(X))

                # Make prediction
                predicted_class = sklearn_model.predict(X)[0]
                predicted_class = get_class(predicted_class)
                prediction_probability = sklearn_model.predict_proba(X)[0]

                # Evaluate prediction
                if predicted_class == "C" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold:
                    current_stage = "Correct"
                elif predicted_class == "L" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold: 
                    current_stage = "Low back"
                elif predicted_class == "H" and prediction_probability[prediction_probability.argmax()] >= prediction_probability_threshold: 
                    current_stage = "High back"
                else:
                    current_stage = "unk"

                # Visualization
                cv2.rectangle(image, (0, 0), (250, 60), (245, 117, 16), -1)
                cv2.putText(image, "CLASS", (95, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, current_stage, (90, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                cv2.putText(image, "PROB", (15, 12), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
                cv2.putText(image, str(round(prediction_probability[np.argmax(prediction_probability)], 2)), (10, 40), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            except Exception as e:
                print(f"Error: {e}")

        else:
            cv2.putText(image, "No human detected", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)

        cv2.imshow("Plank Form Correction - Webcam", image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


ImportError: cannot import name 'float8_e4m3b11fnuz' from 'tensorflow.python.framework.dtypes' (c:\Users\Abdul Rehman\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\framework\dtypes.py)

### Detection in REAL-TIME with timer logic implemented for correct seconds and incorrect seconds

In [None]:
import cv2
import mediapipe as mp
import pickle
import pandas as pd
import numpy as np
from collections import deque
import time

# Load model and scaler
with open("model/LR_model.pkl", "rb") as f:
    sklearn_model = pickle.load(f)

with open("model/input_scaler.pkl", "rb") as f2:
    input_scaler = pickle.load(f2)

# MediaPipe setup
mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Class map
def get_class(pred):
    return {0: "C", 1: "H", 2: "L"}.get(pred)

# Important landmark extractor (68 features)
def extract_important_keypoints(results):
    keypoints = []
    landmarks = results.pose_landmarks.landmark
    IMPORTANT_LMS = [
        "NOSE", "LEFT_SHOULDER", "RIGHT_SHOULDER", "LEFT_ELBOW", "RIGHT_ELBOW",
        "LEFT_WRIST", "RIGHT_WRIST", "LEFT_HIP", "RIGHT_HIP", "LEFT_KNEE", "RIGHT_KNEE",
        "LEFT_ANKLE", "RIGHT_ANKLE", "LEFT_HEEL", "RIGHT_HEEL", "LEFT_FOOT_INDEX", "RIGHT_FOOT_INDEX"
    ]
    for lm in IMPORTANT_LMS:
        landmark = landmarks[mp_pose.PoseLandmark[lm].value]
        keypoints.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])
    return keypoints

# Initialize webcam
cap = cv2.VideoCapture(0)

# Settings
# added this to avoid quick changes in prediction 
prediction_threshold = 0.6
lineancy_buffer_size = 5
prediction_history = deque(maxlen=lineancy_buffer_size)
last_status = None
correct_start = incorrect_start = None
correct_time = 0
incorrect_time = 0

# Pose detection
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Camera error.")
            break

        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image_rgb.flags.writeable = False
        results = pose.process(image_rgb)
        image_rgb.flags.writeable = True
        image = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2BGR)

        if results.pose_landmarks:
            # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            try:
                row = extract_important_keypoints(results)
                X = pd.DataFrame([row])
                X_scaled = pd.DataFrame(input_scaler.transform(X))

                pred = sklearn_model.predict(X_scaled)[0]
                prob = sklearn_model.predict_proba(X_scaled)[0]
                label = get_class(pred)

                if prob[np.argmax(prob)] >= prediction_threshold:
                    prediction_history.append(label)

                # Apply buffer logic
                if len(prediction_history) == lineancy_buffer_size:
                    majority = max(set(prediction_history), key=prediction_history.count)

                    # Only update if status changes
                    if majority != last_status:
                        last_status = majority
                        if majority == "C":
                            correct_start = time.time()
                            incorrect_time += (time.time() - incorrect_start) if incorrect_start else 0
                            incorrect_start = None
                        else:
                            incorrect_start = time.time()
                            correct_time += (time.time() - correct_start) if correct_start else 0
                            correct_start = None

                # Live timer update
                if last_status == "C" and correct_start:
                    elapsed = time.time() - correct_start
                    correct_total = correct_time + elapsed
                else:
                    correct_total = correct_time

                if last_status in ["H", "L"] and incorrect_start:
                    elapsed = time.time() - incorrect_start
                    incorrect_total = incorrect_time + elapsed
                else:
                    incorrect_total = incorrect_time

                # 🖥️ Overlay on frame
                cv2.rectangle(image, (0, 0), (350, 100), (0, 0, 0), -1)
                cv2.putText(image, f"Form: {last_status or 'Analyzing...'}", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                cv2.putText(image, f"Correct Time: {int(correct_total)}s", (10, 60),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
                cv2.putText(image, f"Incorrect Time: {int(incorrect_total)}s", (10, 90),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 100, 255), 2)

            except Exception as e:
                print("Error during prediction:", e)

        else:
            cv2.putText(image, "No human detected", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        cv2.imshow("Plank Form Detection with Timer", image)

        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
