In [2]:
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Define important landmarks for plank detection
IMPORTANT_LMS = [
    "NOSE", "LEFT_SHOULDER", "RIGHT_SHOULDER", "LEFT_ELBOW", "RIGHT_ELBOW",
    "LEFT_WRIST", "RIGHT_WRIST", "LEFT_HIP", "RIGHT_HIP", "LEFT_KNEE",
    "RIGHT_KNEE", "LEFT_ANKLE", "RIGHT_ANKLE", "LEFT_HEEL", "RIGHT_HEEL",
    "LEFT_FOOT_INDEX", "RIGHT_FOOT_INDEX",
]

# Generate feature columns
feature_columns = ["label"]
for lm in IMPORTANT_LMS:
    feature_columns += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

# Create model directory
os.makedirs("model", exist_ok=True)

# Load datasets
try:
    train_df = pd.read_csv("train.csv")
    test_df = pd.read_csv("test.csv")
except FileNotFoundError:
    print("Error: train.csv or test.csv not found!")
    exit(1)

# Check for missing columns
missing_cols = [col for col in feature_columns if col not in train_df.columns]
if missing_cols:
    print(f"Error: Missing columns in dataset: {missing_cols}")
    exit(1)

# Prepare features and labels
X_train = train_df.drop(columns=["label"])
y_train = train_df["label"]
X_test = test_df.drop(columns=["label"])
y_test = test_df["label"]

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, "model/plank_input_scaler.pkl")

# Define models with tuned hyperparameters
models = {
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=150, learning_rate=0.05, max_depth=7, random_state=42),
    "SVM": SVC(C=2.0, kernel='rbf', probability=True),
    "KNN": KNeighborsClassifier(n_neighbors=7, weights='distance')
}

best_model = None
best_accuracy = 0

# Train and evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
    mean_cv_score = np.mean(cv_scores)
    y_pred = model.predict(X_test_scaled)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} -> CV Accuracy: {mean_cv_score:.4f}, Test Accuracy: {test_accuracy:.4f}")
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_model = model
        best_model_name = name

# Save the best model
with open("model/plank_model.pkl", "wb") as f:
    joblib.dump(best_model, f)
print(f"Best model ({best_model_name}) saved with accuracy: {best_accuracy:.4f}")

# Verify shapes for debugging
print("X_train shape:", X_train.shape)  # Should be (n_samples, 64)
print("Number of features expected:", len(feature_columns) - 1)  # Should be 64

Logistic Regression -> CV Accuracy: 0.9928, Test Accuracy: 0.9958
Random Forest -> CV Accuracy: 0.7704, Test Accuracy: 0.9056
Gradient Boosting -> CV Accuracy: 0.8268, Test Accuracy: 0.9155
SVM -> CV Accuracy: 0.8981, Test Accuracy: 0.9873
KNN -> CV Accuracy: 0.6891, Test Accuracy: 0.9507
Best model (Logistic Regression) saved with accuracy: 0.9958
X_train shape: (28520, 68)
Number of features expected: 68


In [7]:
# Save as check_scaler.py
import joblib

scaler = joblib.load("/Users/anandhu/Desktop/Exercise Correction App/plank/model/plank_input_scaler.pkl")
print("Type of scaler:", type(scaler))
print("Has 'transform' method:", hasattr(scaler, 'transform'))

Type of scaler: <class 'sklearn.preprocessing._data.StandardScaler'>
Has 'transform' method: True
