In [2]:
# Complete Exercise Prediction Code for Google Colab
# Run this entire cell to train model and predict on new data

# Step 1: Install required packages
print("Installing required packages...")
import subprocess
import sys

def install_packages():
    packages = ['catboost', 'lightgbm', 'xgboost', 'imbalanced-learn']
    for package in packages:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])

try:
    install_packages()
    print("✅ Packages installed successfully!")
except Exception as e:
    print(f"❌ Error installing packages: {e}")

# Step 2: Import all necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import joblib
import warnings
warnings.filterwarnings('ignore')

print("✅ Libraries imported successfully!")

# Step 3: Load training data
print("\n" + "="*50)
print("STEP 1: LOAD TRAINING DATA")
print("="*50)

# Load training data from local file
training_file = 'exercise_angles.csv'
print(f"Loading training dataset: {training_file}")

try:
    df = pd.read_csv(training_file)
    print(f"✅ Training file loaded: {training_file}")
except FileNotFoundError:
    print(f"❌ File not found: {training_file}")
    print("Please make sure 'exercise_angles.csv' is in the same directory as this notebook.")
    raise

print(f"✅ Dataset loaded with shape: {df.shape}")
print(f"✅ Columns: {list(df.columns)}")
print(f"✅ Unique labels: {df['Label'].unique()}")

# Step 4: Data preprocessing
print("\n" + "="*50)
print("STEP 2: DATA PREPROCESSING")
print("="*50)

# Create label encoders
side_encoder = LabelEncoder()
label_encoder = LabelEncoder()

# Encode categorical columns
df['Side_encoded'] = side_encoder.fit_transform(df['Side'])
df['Label_encoded'] = label_encoder.fit_transform(df['Label'])

# Prepare features and target
feature_columns = ['Side_encoded', 'Shoulder_Angle', 'Elbow_Angle', 'Hip_Angle', 'Knee_Angle',
                  'Ankle_Angle', 'Shoulder_Ground_Angle', 'Elbow_Ground_Angle',
                  'Hip_Ground_Angle', 'Knee_Ground_Angle', 'Ankle_Ground_Angle']

X = df[feature_columns]
y = df['Label_encoded']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Apply SMOTE for class balancing
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print(f"✅ Original training size: {X_train.shape[0]}")
print(f"✅ After SMOTE training size: {X_train_res.shape[0]}")
print(f"✅ Test size: {X_test.shape[0]}")

# Step 5: Model training
print("\n" + "="*50)
print("STEP 3: MODEL TRAINING")
print("="*50)

# Train Random Forest
print("Training Random Forest...")
rf_model = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf_model.fit(X_train_res, y_train_res)

# Train XGBoost
print("Training XGBoost...")
xgb_model = XGBClassifier(random_state=42, eval_metric='mlogloss')
xgb_model.fit(X_train_res, y_train_res)

# Train LightGBM
print("Training LightGBM...")
lgb_model = LGBMClassifier(random_state=42, verbose=-1)
lgb_model.fit(X_train_res, y_train_res)

# Create ensemble model
print("Creating ensemble model...")
ensemble_model = VotingClassifier(
    estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model),
        ('lgb', lgb_model)
    ],
    voting='soft',
    n_jobs=-1
)
ensemble_model.fit(X_train_res, y_train_res)

print("✅ All models trained successfully!")

# Step 6: Model evaluation
print("\n" + "="*50)
print("STEP 4: MODEL EVALUATION")
print("="*50)

models = {
    'Random Forest': rf_model,
    'XGBoost': xgb_model,
    'LightGBM': lgb_model,
    'Ensemble': ensemble_model
}

best_model = None
best_accuracy = 0
best_model_name = ""

for name, model in models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model
        best_model_name = name

print(f"\n🏆 Best Model: {best_model_name} with accuracy: {best_accuracy:.4f}")

# Detailed classification report for best model
print(f"\nDetailed Classification Report for {best_model_name}:")
y_pred_best = best_model.predict(X_test)
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))

# Step 7: Save models and encoders
print("\n" + "="*50)
print("STEP 5: SAVING MODELS")
print("="*50)

# Save the best model and encoders
joblib.dump(best_model, 'best_exercise_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(side_encoder, 'side_encoder.pkl')

print("✅ Models and encoders saved successfully!")
print("Files saved:")
print("- best_exercise_model.pkl")
print("- label_encoder.pkl")
print("- side_encoder.pkl")

# Step 8: Upload new data for prediction
print("\n" + "="*50)
print("STEP 6: UPLOAD NEW DATA FOR PREDICTION")
print("="*50)

try:
    # Load new data from local file
    new_data_file = 'new_exercise_data.csv'  # Change this to your actual file name
    print(f"Loading new data file for prediction: {new_data_file}")

    # Load new data
    new_data = pd.read_csv(new_data_file)
    print(f"✅ New data loaded with shape: {new_data.shape}")

    # Step 9: Make predictions
    print("\n" + "="*50)
    print("STEP 7: MAKING PREDICTIONS")
    print("="*50)

    # Prepare new data
    new_data_copy = new_data.copy()

    # Encode Side column
    new_data_copy['Side_encoded'] = side_encoder.transform(new_data_copy['Side'])

    # Prepare features for prediction
    X_new = new_data_copy[feature_columns]

    # Make predictions
    y_pred_numeric = best_model.predict(X_new)

    # Convert predictions back to original labels
    predicted_labels = label_encoder.inverse_transform(y_pred_numeric)

    # Add predictions to the original data
    result_data = new_data.copy()
    result_data['Predicted_Label'] = predicted_labels

    # Display results
    print("✅ Predictions completed!")
    print(f"Total predictions made: {len(predicted_labels)}")

    print("\nFirst 10 predictions:")
    display_cols = ['Side', 'Shoulder_Angle', 'Elbow_Angle', 'Hip_Angle', 'Predicted_Label']
    available_cols = [col for col in display_cols if col in result_data.columns]
    print(result_data[available_cols].head(10))

    print("\nPrediction distribution:")
    prediction_counts = pd.Series(predicted_labels).value_counts()
    for label, count in prediction_counts.items():
        percentage = (count / len(predicted_labels)) * 100
        print(f"{label}: {count} ({percentage:.1f}%)")

    # Save results
    result_data.to_csv('exercise_predictions_results.csv', index=False)
    print("\n✅ Results saved to 'exercise_predictions_results.csv'")

    # Download the results
    print("\nDownloading results file...")
    files.download('exercise_predictions_results.csv')

    # Step 10: Summary
    print("\n" + "="*50)
    print("SUMMARY")
    print("="*50)
    print(f"✅ Best Model: {best_model_name}")
    print(f"✅ Model Accuracy: {best_accuracy:.4f}")
    print(f"✅ Total Predictions: {len(predicted_labels)}")
    print(f"✅ Unique Exercises Predicted: {len(prediction_counts)}")
    print("✅ Results downloaded successfully!")

    print("\nExercise Types Found:")
    for exercise in prediction_counts.index:
        print(f"- {exercise}")

except Exception as e:
    print(f"❌ Error in prediction phase: {e}")
    print("You can still use the trained model later by loading the saved files.")

    # Show how to use the model later
    print("\n" + "="*50)
    print("HOW TO USE THE SAVED MODEL LATER")
    print("="*50)
    print("""
To use the saved model later, run this code:

import joblib
import pandas as pd

# Load saved components
model = joblib.load('best_exercise_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')
side_encoder = joblib.load('side_encoder.pkl')

# Load your new data
new_data = pd.read_csv('your_new_data.csv')

# Prepare features
new_data['Side_encoded'] = side_encoder.transform(new_data['Side'])
feature_columns = ['Side_encoded', 'Shoulder_Angle', 'Elbow_Angle', 'Hip_Angle', 'Knee_Angle',
                  'Ankle_Angle', 'Shoulder_Ground_Angle', 'Elbow_Ground_Angle',
                  'Hip_Ground_Angle', 'Knee_Ground_Angle', 'Ankle_Ground_Angle']
X_new = new_data[feature_columns]

# Make predictions
predictions = model.predict(X_new)
predicted_labels = label_encoder.inverse_transform(predictions)

# Add to results
new_data['Predicted_Label'] = predicted_labels
    """)

print("\n🎉 Process completed successfully!")

Installing required packages...


✅ Packages installed successfully!
✅ Libraries imported successfully!

STEP 1: LOAD TRAINING DATA
Loading training dataset: exercise_angles.csv
✅ Training file loaded: exercise_angles.csv
✅ Dataset loaded with shape: (31033, 12)
✅ Columns: ['Side', 'Shoulder_Angle', 'Elbow_Angle', 'Hip_Angle', 'Knee_Angle', 'Ankle_Angle', 'Shoulder_Ground_Angle', 'Elbow_Ground_Angle', 'Hip_Ground_Angle', 'Knee_Ground_Angle', 'Ankle_Ground_Angle', 'Label']
✅ Unique labels: ['Jumping Jacks' 'Squats' 'Push Ups' 'Pull ups' 'Russian twists']

STEP 2: DATA PREPROCESSING
✅ Original training size: 21723
✅ After SMOTE training size: 34175
✅ Test size: 9310

STEP 3: MODEL TRAINING
Training Random Forest...
Training XGBoost...
Training LightGBM...
Creating ensemble model...
✅ All models trained successfully!

STEP 4: MODEL EVALUATION
Random Forest Accuracy: 0.9668
XGBoost Accuracy: 0.9660
LightGBM Accuracy: 0.9619
Ensemble Accuracy: 0.9662

🏆 Best Model: Random Forest with accuracy: 0.9668

Detailed Classificatio