In [15]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import zipfile

In [17]:
# Define dataset path
zip_path = "C:\\Users\\Ananya\\Downloads\\sem 3\\Action learning\\files_to_streamlit\\archive.zip"
extract_dir = "C:\\Users\\Ananya\\Downloads\\sem 3\\Action learning\\dataset"

# Extract dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

data_new_path = os.path.join(extract_dir, "data_new")
participants = os.listdir(data_new_path)

# Initialize lists to store features and labels
all_features = []
all_labels = []

In [19]:
# Feature extraction function
def extract_features(joint_data):
    num_frames, num_joints, num_coords = joint_data.shape
    features = []
    for i in range(num_frames):
        frame = joint_data[i]
        mean_positions = np.mean(frame, axis=0)
        std_positions = np.std(frame, axis=0)
        min_positions = np.min(frame, axis=0)
        max_positions = np.max(frame, axis=0)
        joint_distances = np.linalg.norm(frame - frame[0], axis=1)
        mean_distance = np.mean(joint_distances)
        std_distance = np.std(joint_distances)
        range_positions = max_positions - min_positions  # Range of joint positions
        skewness = np.mean((frame - mean_positions) ** 3, axis=0) / (std_positions ** 3 + 1e-8)  # Skewness
        feature_vector = np.hstack([mean_positions, std_positions, min_positions, max_positions, range_positions, mean_distance, std_distance, skewness])
        features.append(feature_vector)
    return np.array(features)

In [23]:
# Process participant data
def process_participant(participant):
    participant_path = os.path.join(data_new_path, participant)
    tasks = os.listdir(participant_path)
    for task in tasks:
        task_path = os.path.join(participant_path, task)
        joint_positions_file = os.path.join(task_path, "Joint_Positions.csv")
        labels_file = os.path.join(task_path, "Labels.csv")
        if os.path.exists(joint_positions_file) and os.path.exists(labels_file):
            joint_positions = pd.read_csv(joint_positions_file).to_numpy()
            labels = pd.read_csv(labels_file).to_numpy().flatten()
            num_frames = labels.shape[0]
            joint_positions = joint_positions[:num_frames * 25].reshape(num_frames, 25, 3)
            features = extract_features(joint_positions)
            all_features.append(features)
            all_labels.append(labels)

# Process all participants
for participant in participants:
    process_participant(participant)

In [25]:
# Convert lists to numpy arrays
X = np.vstack(all_features)
y = np.concatenate(all_labels)

# Convert labels to binary (Normal: 1, Abnormal: 0)
y = np.where(y == 1, "Normal", "Abnormal")

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [27]:
# Train Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9755304101838755
              precision    recall  f1-score   support

    Abnormal       0.96      0.94      0.95      3844
      Normal       0.98      0.99      0.98     10296

    accuracy                           0.98     14140
   macro avg       0.97      0.97      0.97     14140
weighted avg       0.98      0.98      0.98     14140



In [31]:
# Save trained model
model_path = "stroke_rehab_final_pose_model.pkl"
joblib.dump(clf, model_path)
print(f"Model saved as {model_path}")

Model saved as stroke_rehab_final_pose_model.pkl
