In [1]:
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Define important landmarks for bicep curl detection
IMPORTANT_LMS = [
    "NOSE", "LEFT_SHOULDER", "RIGHT_SHOULDER", "RIGHT_ELBOW", "LEFT_ELBOW",
    "RIGHT_WRIST", "LEFT_WRIST", "LEFT_HIP", "RIGHT_HIP"
]

# Generate feature columns
feature_columns = ["label"]
for lm in IMPORTANT_LMS:
    feature_columns += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

# Create model directory
os.makedirs("model", exist_ok=True)

# Load datasets (assuming train.csv and test.csv are prepared)
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Check for missing columns
missing_cols = [col for col in feature_columns if col not in train_df.columns]
if missing_cols:
    print(f"Error: Missing columns in dataset: {missing_cols}")
    exit(1)

# Prepare features and labels
X_train = train_df.drop(columns=["label"])
y_train = train_df["label"]
X_test = test_df.drop(columns=["label"])
y_test = test_df["label"]

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, "model/bicep_curl_input_scaler.pkl")

# Define models with tuned hyperparameters
models = {
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=200, max_depth=15, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=150, learning_rate=0.05, max_depth=7, random_state=42),
    "SVM": SVC(C=2.0, kernel='rbf', probability=True),
    "KNN": KNeighborsClassifier(n_neighbors=7, weights='distance')
}

best_model = None
best_accuracy = 0

# Train and evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
    mean_cv_score = np.mean(cv_scores)
    y_pred = model.predict(X_test_scaled)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} -> CV Accuracy: {mean_cv_score:.4f}, Test Accuracy: {test_accuracy:.4f}")
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_model = model
        best_model_name = name

# Save the best model
joblib.dump(best_model, "model/bicep_curl_model.pkl")
print(f"Best model ({best_model_name}) saved with accuracy: {best_accuracy:.4f}")
print("X_train shape:", X_train.shape)
print("Number of features expected:", len(feature_columns) - 1)

Logistic Regression -> CV Accuracy: 0.8500, Test Accuracy: 0.7666
Random Forest -> CV Accuracy: 0.7857, Test Accuracy: 0.9652
Gradient Boosting -> CV Accuracy: 0.7784, Test Accuracy: 0.9040
SVM -> CV Accuracy: 0.8831, Test Accuracy: 0.9040
KNN -> CV Accuracy: 0.7728, Test Accuracy: 0.9752
Best model (KNN) saved with accuracy: 0.9752
X_train shape: (15372, 36)
Number of features expected: 36
