In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the CSV file (replace with your file path)
data = pd.read_csv(r'E:\jupyter notebook\MHI\MHI.csv')
X = data.iloc[:, :-2]  # Features: all columns except the last two
y = data.iloc[:, -1]   # Labels: last column (0 for non-fall, 1 for fall)

# Split data for test evaluation (unscaled, for feature selection)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
data.info()

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Corrected: Unpack all four values from train_test_split
X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Function to get top 50 features using Random Forest (using unscaled data)
def get_top_features(X, y, n_features=50):
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X, y)
    feature_importance = pd.Series(rf.feature_importances_, index=X.columns)
    top_features = feature_importance.sort_values(ascending=False).head(n_features).index
    return X[top_features], top_features

# Get reduced feature set
X_top, top_features = get_top_features(X, y)
# Use scaled data with the selected top features
X_train_top = X_train_scaled[:, X.columns.get_indexer(top_features)]
X_test_top = X_test_scaled[:, X.columns.get_indexer(top_features)]


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Columns: 307202 entries, 0 to fall/nonfall
dtypes: int64(307202)
memory usage: 164.1 MB


In [2]:
# 1. Random Forest Classifier
def evaluate_random_forest(X_train, X_test, y_train, y_test):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("Random Forest Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")


In [3]:
# Run all classifiers
evaluate_random_forest(X_train_top, X_test_top, y_train, y_test)


Random Forest Results:
CV Scores: [0.92857143 1.         1.         0.78571429 1.        ]
Mean CV Score: 0.943
Std CV Score: 0.083
Test Accuracy: 0.857
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83         6
           1       0.88      0.88      0.88         8

    accuracy                           0.86        14
   macro avg       0.85      0.85      0.85        14
weighted avg       0.86      0.86      0.86        14




In [4]:

# 2. Logistic Regression Classifier
def evaluate_logistic_regression(X_train, X_test, y_train, y_test):
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("Logistic Regression Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")


In [5]:
evaluate_logistic_regression(X_train_top, X_test_top, y_train, y_test)


Logistic Regression Results:
CV Scores: [0.92857143 0.92857143 1.         0.78571429 1.        ]
Mean CV Score: 0.929
Std CV Score: 0.078
Test Accuracy: 0.857
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83         6
           1       0.88      0.88      0.88         8

    accuracy                           0.86        14
   macro avg       0.85      0.85      0.85        14
weighted avg       0.86      0.86      0.86        14




In [6]:

# 3. SVM Classifier (RBF Kernel)
def evaluate_svm_rbf(X_train, X_test, y_train, y_test):
    model = SVC(kernel='rbf', random_state=42)
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("SVM (RBF) Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")




In [7]:

evaluate_svm_rbf(X_train_top, X_test_top, y_train, y_test)


SVM (RBF) Results:
CV Scores: [0.92857143 0.92857143 1.         0.78571429 1.        ]
Mean CV Score: 0.929
Std CV Score: 0.078
Test Accuracy: 0.857
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83         6
           1       0.88      0.88      0.88         8

    accuracy                           0.86        14
   macro avg       0.85      0.85      0.85        14
weighted avg       0.86      0.86      0.86        14




In [8]:
# 4. K-Nearest Neighbors Classifier
def evaluate_knn(X_train, X_test, y_train, y_test):
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("KNN Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")


In [9]:
evaluate_knn(X_train_top, X_test_top, y_train, y_test)


KNN Results:
CV Scores: [1.         0.92857143 1.         0.78571429 1.        ]
Mean CV Score: 0.943
Std CV Score: 0.083
Test Accuracy: 0.929
Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.92         6
           1       1.00      0.88      0.93         8

    accuracy                           0.93        14
   macro avg       0.93      0.94      0.93        14
weighted avg       0.94      0.93      0.93        14




In [10]:

# 5. Decision Tree Classifier
def evaluate_decision_tree(X_train, X_test, y_train, y_test):
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("Decision Tree Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")

In [11]:
evaluate_decision_tree(X_train_top, X_test_top, y_train, y_test)

Decision Tree Results:
CV Scores: [0.85714286 1.         1.         0.78571429 1.        ]
Mean CV Score: 0.929
Std CV Score: 0.090
Test Accuracy: 0.857
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.83      0.83         6
           1       0.88      0.88      0.88         8

    accuracy                           0.86        14
   macro avg       0.85      0.85      0.85        14
weighted avg       0.86      0.86      0.86        14




In [12]:
from sklearn.naive_bayes import GaussianNB  # Added for Naive Bayes
def evaluate_naive_bayes(X_train, X_test, y_train, y_test):
    model = GaussianNB()
    model.fit(X_train, y_train)
    cv_scores = cross_val_score(model, X_top, y, cv=5)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, zero_division=0)
    print("Naive Bayes (Gaussian) Results:")
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.3f}")
    print(f"Std CV Score: {cv_scores.std():.3f}")
    print(f"Test Accuracy: {accuracy:.3f}")
    print(f"Classification Report:\n{report}\n")

In [13]:
evaluate_naive_bayes(X_train_top, X_test_top, y_train_scaled, y_test_scaled)

Naive Bayes (Gaussian) Results:
CV Scores: [0.64285714 1.         0.92857143 0.85714286 0.92857143]
Mean CV Score: 0.871
Std CV Score: 0.123
Test Accuracy: 0.929
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.83      0.91         6
           1       0.89      1.00      0.94         8

    accuracy                           0.93        14
   macro avg       0.94      0.92      0.93        14
weighted avg       0.94      0.93      0.93        14


