# Fall Detection using SisFall Dataset
Daniela Dias, nMec 98039

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, classification_report, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay, \
    roc_curve
import optuna
import optuna.visualization as vis
import joblib
import plotly

In [2]:
# optuna.logging.disable_default_handler()

In [3]:
# Load the preprocessed dataset from CSV files
X_train = pd.read_csv('X_train.csv')
X_test = pd.read_csv('X_test.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')

In [4]:
# Check the shape of the loaded dataset
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((10960, 99), (10960, 1), (1870, 99), (1870, 1))

## Utility Functions

In [5]:
def optuna_visualizations(study, title, params):
    # Visualize the hyperparameter combinations and their performance
    fig = vis.plot_contour(study, params=params)
    fig.title(f"Hyperparameter Contour Plot for {title}")
    fig.show()

    # Visualize the optimization history
    fig = vis.plot_optimization_history(study)
    fig.title(f"Optimization History for {title}")
    fig.show()

    # Visualize the parameter importance
    fig = vis.plot_param_importances(study)
    fig.title(f"Parameter Importance for {title}")
    fig.show()

In [6]:
def confusion_matrix_plot(y_test, y_pred, title):
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Plot the confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=["Fall", "No Fall"])
    disp.plot(cmap='Blues')
    disp.ax_.set_title(title)
    disp.ax_.set_xlabel("Predicted")
    disp.ax_.set_ylabel("True")
    disp.figure_.set_size_inches(8, 6)
    disp.figure_.tight_layout()
    plt.show()

In [7]:
def roc_curve_plot(y_test, y_proba, title):
    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(y_test, y_proba[:, 1], pos_label=1)
    roc_auc = roc_auc_score(y_test, y_proba[:, 1])

    # Plot ROC curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label='ROC Curve (area = {:.2f})'.format(roc_auc), color='blue')
    plt.plot([0, 1], [0, 1], 'r--', label='Random Guessing')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc='lower right')
    plt.grid()
    plt.show()

In [8]:
def run_model(model, X_train, y_train, X_test, y_test):
    # Fit the model on the training data
    model.fit(X_train, y_train.values.ravel())

    # Make predictions on the test data
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)

    # Calculate the F1 score
    f1 = f1_score(y_test, y_pred, average='macro')
    print("F1 score: ", f1)

    # Print the classification report
    print(classification_report(y_test, y_pred))

    # Calculate ROC-AUC score
    roc_auc = roc_auc_score(y_test, y_proba)
    print(f"ROC-AUC Score: {roc_auc:.4f}")

    return y_pred, y_proba

## Support Vector Machine (SVM)

In [9]:
def svm_objective(trial):
    # Define the hyperparameter search space
    C = trial.suggest_float('C', 1e-5, 1e5, log=True)
    gamma = trial.suggest_float('gamma', 1e-5, 1e5, log=True)
    kernel = trial.suggest_categorical('kernel', ['linear', 'rbf'])

    # Create the SVM model with the suggested hyperparameters
    model = SVC(C=C, gamma=gamma, kernel=kernel)

    # Fit the model on the training data
    model.fit(X_train, y_train.values.ravel())

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Calculate the F1 score
    return f1_score(y_test, y_pred, average='macro')

In [None]:
# Create a study object and optimize the objective function
svm_study = optuna.create_study(direction='maximize')
svm_study.optimize(svm_objective, n_trials=10)

[I 2025-05-02 23:01:16,122] A new study created in memory with name: no-name-690881fa-4c59-498b-96ce-a993f2fc5896
[I 2025-05-02 23:01:25,674] Trial 0 finished with value: 0.737865478221553 and parameters: {'C': 0.0004544568418909225, 'gamma': 0.1356562834477823, 'kernel': 'linear'}. Best is trial 0 with value: 0.737865478221553.
[I 2025-05-02 23:01:36,492] Trial 1 finished with value: 0.7105858835288239 and parameters: {'C': 0.10889621050501146, 'gamma': 7662.697424308969, 'kernel': 'linear'}. Best is trial 0 with value: 0.737865478221553.
[I 2025-05-02 23:01:43,590] Trial 2 finished with value: 0.7214611019233873 and parameters: {'C': 0.003609710241951188, 'gamma': 0.000972948243454224, 'kernel': 'linear'}. Best is trial 0 with value: 0.737865478221553.
[I 2025-05-02 23:02:07,168] Trial 3 finished with value: 0.7014731017408393 and parameters: {'C': 0.000680318886522783, 'gamma': 6.874648850197391e-05, 'kernel': 'rbf'}. Best is trial 0 with value: 0.737865478221553.
[I 2025-05-02 23:0

In [None]:
# Print the best hyperparameters and the corresponding F1 score
print("Best hyperparameters: ", svm_study.best_params)
print("Best F1 score: ", svm_study.best_value)

In [None]:
# Visualize optuna results
optuna_visualizations(svm_study, "SVM", ["C", "gamma", "kernel"])

In [ ]:
# Run the SVM model with the best hyperparameters
best_svm_model = SVC(**svm_study.best_params)
y_pred, y_proba = run_model(best_svm_model, X_train, y_train, X_test, y_test)

In [ ]:
# Plot confusion matrix
confusion_matrix_plot(y_test, y_pred, "Confusion Matrix for SVM")

In [ ]:
# Plot ROC curve
roc_curve_plot(y_test, y_proba, "ROC Curve for SVM")

In [ ]:
# Save the model
joblib.dump(best_svm_model, 'best_svm_model.pkl')

## Random Forest

In [ ]:
def rf_objective(trial):
    # Define the hyperparameter search space
    n_estimators = trial.suggest_int('n_estimators', 10, 200)
    max_depth = trial.suggest_int('max_depth', 1, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)

    # Create the Random Forest model with the suggested hyperparameters
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
                                   min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)

    # Fit the model on the training data
    model.fit(X_train, y_train.values.ravel())

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Calculate the F1 score
    return f1_score(y_test, y_pred, average='macro')

In [ ]:
# Create a study object and optimize the objective function
rf_study = optuna.create_study(direction='maximize')
rf_study.optimize(rf_objective, n_trials=10)

In [ ]:
# Print the best hyperparameters and the corresponding F1 score
print("Best hyperparameters: ", rf_study.best_params)
print("Best F1 score: ", rf_study.best_value)

In [ ]:
# Visualize optuna results
optuna_visualizations(rf_study, "Random Forest", ["n_estimators", "max_depth", "min_samples_split", "min_samples_leaf"])

In [ ]:
# Run the Random Forest model with the best hyperparameters
best_rf_model = RandomForestClassifier(**rf_study.best_params)
y_pred, y_proba = run_model(best_rf_model, X_train, y_train, X_test, y_test)

In [ ]:
# Plot confusion matrix
confusion_matrix_plot(y_test, y_pred, "Confusion Matrix for Random Forest")

In [ ]:
# Plot ROC curve
roc_curve_plot(y_test, y_proba, "ROC Curve for Random Forest")

In [ ]:
# Save the model
joblib.dump(best_rf_model, 'best_rf_model.pkl')

## K-Nearest Neighbors (KNN)

In [ ]:
def knn_objective(trial):
    # Define the hyperparameter search space
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    algorithm = trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute'])

    # Create the KNN model with the suggested hyperparameters
    model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm)

    # Fit the model on the training data
    model.fit(X_train, y_train.values.ravel())

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Calculate the F1 score
    return f1_score(y_test, y_pred, average='macro')

In [ ]:
# Create a study object and optimize the objective function
knn_study = optuna.create_study(direction='maximize')
knn_study.optimize(knn_objective, n_trials=10)

In [ ]:
# Print the best hyperparameters and the corresponding F1 score
print("Best hyperparameters: ", knn_study.best_params)
print("Best F1 score: ", knn_study.best_value)

In [ ]:
# Visualize optuna results
optuna_visualizations(knn_study, "KNN", ["n_neighbors", "weights", "algorithm"])

In [ ]:
# Run the KNN model with the best hyperparameters
best_knn_model = KNeighborsClassifier(**knn_study.best_params)
y_pred, y_proba = run_model(best_knn_model, X_train, y_train, X_test, y_test)

In [ ]:
# Plot confusion matrix
confusion_matrix_plot(y_test, y_pred, "Confusion Matrix for KNN")

In [ ]:
# Plot ROC curve
roc_curve_plot(y_test, y_proba, "ROC Curve for KNN")

In [ ]:
# Save the model
joblib.dump(best_knn_model, 'best_knn_model.pkl')