kurze Projektbeschreibung


In [1]:
import cv2
from ultralytics import YOLO
from PIL import Image
import numpy as np
import os
import csv
import pandas as pd


In [2]:
# Load YOLO pose model
model_pose = YOLO('yolo11n-pose.pt')


# Define training exercise and corresponding input and output folders

#input_folder = "videos/single_pushup_videos/"
#output_folder = "keyjoints/pushups/"

#input_folder = "videos/single_squat_videos/"
#output_folder = "keyjoints/squats/"

input_folder = "videos/single_pullup_videos/"
output_folder = "keyjoints/pullups/"

os.makedirs(output_folder, exist_ok=True)


Aus dem input Ordner werden single videos geladen und alle keypoints pro video in einer csv gespeichert.

In [4]:
# Process first all videos in the folder
video_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.mp4', '.mov'))] # add [:3] if only first three videos

for video_file in video_files:
    output_csv_path = os.path.join(output_folder, os.path.splitext(video_file)[0] + ".csv")
    if os.path.exists(output_csv_path):
        print(f"⏭️ Skipping {video_file} (CSV already exists)")
        continue
    video_path = os.path.join(input_folder, video_file)
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    fps = cap.get(cv2.CAP_PROP_FPS)

    output_rows = []
    xy_headers = [f"kp_{i}_{coord}" for i in range(17) for coord in ("x", "y")]
    conf_headers = [f"kp_{i}_conf" for i in range(17)]
    columns = ["frame", "time_sec"] + xy_headers + conf_headers

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        timestamp = frame_idx / fps
        results = model_pose.predict(source=frame, save=False, conf=0.25, verbose=False)

        for result in results:
            keypoints_xy = [np.nan] * (17 * 2)
            confidences = [np.nan] * 17

            try:
                keypoints = result.keypoints.xy[0].cpu().numpy()
                confidences = result.keypoints.conf[0].cpu().numpy()

                flattened_xy = []
                for i in range(17):
                    if i >= len(confidences) or confidences[i] < 0.2:
                        x, y = np.nan, np.nan
                    else:
                        x, y = keypoints[i]
                    flattened_xy.extend([x, y])
            except:
                flattened_xy = [np.nan] * (17 * 2)

            output_rows.append([frame_idx, timestamp] + flattened_xy + confidences.tolist())

        frame_idx += 1

    cap.release()

    # Save to CSV
    df_out = pd.DataFrame(output_rows, columns=columns)
    keypoint_cols = [col for col in df_out.columns if col.startswith("kp_")]
    df_out[keypoint_cols] = df_out[keypoint_cols].replace(0.0, np.nan)

    output_csv_path = os.path.join(output_folder, os.path.splitext(video_file)[0] + ".csv")
    df_out.to_csv(output_csv_path, index=False, na_rep="NaN")
    print(f"✅ Saved: {output_csv_path}")

Sampling

Iterate through all csv files from the single exercise videos and extract sampled csv files with only six frames describing an exercise.

In [5]:
# Function to sample 6 evenly spaced frames from full CSV
def sample_csv(input_csv_path, output_csv_path, num_samples=6):
    if os.path.exists(output_csv_path):
        print(f"⏭️ Skipping {output_csv_path} (already exists)")
        return

    df = pd.read_csv(input_csv_path)
    total_frames = len(df)

    if total_frames < num_samples:
        print(f"⚠️ Not enough frames in {input_csv_path} ({total_frames} < {num_samples}) — skipped.")
        return

    sampled_idxs = np.linspace(0, total_frames - 1, num_samples, dtype=int)
    df_sampled = df.iloc[sampled_idxs]
    df_sampled.to_csv(output_csv_path, index=False)
    print(f"✅ Saved: {output_csv_path}")

# Input and output folders based on your project structure
base_input_dir = "keyjoints"
base_output_dir = "keyjoints_sampled"
exercise_types = ["pushups", "squats", "pullups"]

# Loop through each exercise type
for exercise in exercise_types:
    input_folder = os.path.join(base_input_dir, exercise)
    output_folder = os.path.join(base_output_dir, f"{exercise}_sampled")
    os.makedirs(output_folder, exist_ok=True)

    csv_files = [f for f in os.listdir(input_folder) if f.endswith(".csv")]

    for csv_file in csv_files:
        input_csv = os.path.join(input_folder, csv_file)
        output_csv = os.path.join(output_folder, csv_file)
        sample_csv(input_csv, output_csv)


⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_2.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_3.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_1.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_4.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_5.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_7.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi2_6.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi1_3.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi1_2.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi1_1.csv (already exists)
⏭️ Skipping keyjoints_sampled/pushups_sampled/single_pushup_fabi1_5.csv (already exists)
⏭️ Skipping keyjoints

Aus den samples wird ein feature dataset kreiert, das alle keyjoints x und y für jede Übung labelt.

In [6]:
from sklearn.impute import SimpleImputer

# Input base folder
base_input_dir = "keyjoints_sampled"
exercise_labels = {
    "pushups_sampled": 0,
    "squats_sampled": 1,
    "pullups_sampled": 2
}

X = []
y = []

# Loop over each label type (exercise class)
for folder_name, label in exercise_labels.items():
    folder_path = os.path.join(base_input_dir, folder_name)
    if not os.path.exists(folder_path):
        print(f"❌ Folder not found: {folder_path}")
        continue

    for file in os.listdir(folder_path):
        if not file.endswith(".csv"):
            continue
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)

        # Drop non-keypoint columns
        keypoint_cols = [col for col in df.columns if col.startswith("kp_")]
        kp_data = df[keypoint_cols]

        # Flatten to one vector (6 frames × 51 features = 306 values)
        flat = kp_data.to_numpy().flatten().reshape(1, -1)

        # Impute missing values (mean per column)
        imputer = SimpleImputer(strategy="mean")
        flat_imputed = imputer.fit_transform(flat)

        X.append(flat_imputed.flatten())
        y.append(label)

print(f"✅ Total samples processed: {len(X)}")

# Optional: save as a dataset CSV
X_df = pd.DataFrame(X)
X_df["label"] = y
X_df.to_csv("feature_dataset.csv", index=False)
print("✅ Saved feature matrix to: feature_dataset.csv")




✅ Total samples processed: 74
✅ Saved feature matrix to: feature_dataset.csv


Train exercise recognition model with sklearn using previous generated feature dataset.

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Load your dataset
df = pd.read_csv("feature_dataset.csv") 

# Split into features and labels
X = df.drop(columns=["label"])
y = df["label"]

# Train/test split (z. B. 80% Training, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print("✅ Classification report:\n", classification_report(y_test, y_pred))
print("🧩 Confusion matrix:\n", confusion_matrix(y_test, y_pred))

# Optional: Save model
joblib.dump(clf, "exercise_classifier.pkl")
print("💾 Model saved as: exercise_classifier.pkl")


✅ Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00         7

    accuracy                           1.00        15
   macro avg       1.00      1.00      1.00        15
weighted avg       1.00      1.00      1.00        15

🧩 Confusion matrix:
 [[8 0]
 [0 7]]
💾 Model saved as: exercise_classifier.pkl
