In [1]:
import pandas as pd
from matplotlib import pyplot as plt
from pipeline.acquisition import load_sisfall_data
from pipeline.preprocessing import change_activity_duration
from pipeline.preprocessing import change_activity_sampling
from pipeline.feature_extraction import extract_features
import joblib
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, ConfusionMatrixDisplay
from sklearn import preprocessing
from sklearn.feature_selection import SelectFromModel

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
dataset_folder = "datasets/SisFall_dataset/"
models_output_folder = 'models/'
uma_dataset_folder = "datasets/output_uma"
combined_dataset_folder = "datasets/combined_dataset"

INCLUDE_GYROSCOPE = False

if INCLUDE_GYROSCOPE:
    sensors =  [0, 1, 2, 3, 4, 5]
else:
    sensors = [0, 1, 2]

should_load_sisfall_data = False

ignored_subjects = []
duration = 12000
frequency = 200

should_save_models = True

In [None]:
import os
import pandas as pd
import numpy as np

earpiece_classes = ['walking', 'jogging','stairs']

# read earpiece data from datasets/earpiece from *.csv files
earpiece_folder = "datasets/earpiece/"
earpiece_data = []

# Search for all CSV files in the earpiece folder
for filename in os.listdir(earpiece_folder):
    if filename.endswith('.csv'):
        file_path = os.path.join(earpiece_folder, filename)
        
        # Extract activity class from filename
        for activity in earpiece_classes:
            if activity in filename:
                class_name = activity
                break
        else:
            continue  # Skip files that don't match any activity class
        
        # Read data
        df = pd.read_csv(file_path)
        
        # Process accelerometer data
        if 'X' in df.columns and 'Y' in df.columns and 'Z' in df.columns:
            # Rename columns to match the format used in other datasets
            sensor_data = df[['X', 'Y', 'Z']].copy()
            sensor_data.columns = ['acc_x', 'acc_y', 'acc_z']
            
            # Create a record for this activity
            earpiece_data.append({
            'class': class_name,
            'data': sensor_data,
            })
        else:
            print(f"Warning: File {filename} does not contain X, Y, Z accelerometer columns")

# Convert to DataFrame
earpiece_raw_dataset = pd.DataFrame(earpiece_data)
feature_list = []
class_labels = []
for i in earpiece_raw_dataset.index:
    data = earpiece_raw_dataset['data'][i]
    features = extract_features(data, True)
    feature_list.append(features)
    class_labels.append(earpiece_raw_dataset['class'][i])
    prepared_earpiece_dataset = pd.concat(feature_list, ignore_index=True)


class_labels

['walking',
 'stairs',
 'walking',
 'walking',
 'stairs',
 'stairs',
 'walking',
 'walking',
 'stairs',
 'stairs',
 'walking',
 'walking',
 'stairs',
 'stairs',
 'jogging',
 'jogging',
 'jogging',
 'stairs']

In [27]:
earpiece_raw_dataset['data'][0]

Unnamed: 0,acc_x,acc_y,acc_z,mag_acc
0,-0.923340,0.348633,0.310547,1.034669
1,-0.913086,0.353516,0.312500,1.027792
2,-0.897461,0.354492,0.295410,1.009142
3,-0.897461,0.360352,0.291016,1.009940
4,-0.893066,0.363281,0.281738,1.004449
...,...,...,...,...
7780,-0.929199,0.449219,0.131836,1.040476
7781,-0.928223,0.438965,0.137695,1.035977
7782,-0.919922,0.430664,0.147461,1.026388
7783,-0.908203,0.407227,0.148926,1.006402


In [None]:
final_earpiece
# Grab 2 

Unnamed: 0,mean_acc_x,mean_acc_y,mean_acc_z,mean_mag_acc,var_acc_x,var_acc_y,var_acc_z,var_mag_acc,std_acc_x,std_acc_y,...,ptp_mag_acc,centile25_acc_x,centile25_acc_y,centile25_acc_z,centile25_mag_acc,centile75_acc_x,centile75_acc_y,centile75_acc_z,centile75_mag_acc,class
0,-0.814437,0.546556,-0.133941,1.012872,0.012891,0.018413,0.026289,0.011655,0.11354,0.135695,...,1.217528,-0.884277,0.452148,-0.250977,0.951735,-0.744629,0.639648,-0.023926,1.053523,walking
1,-0.981495,0.130228,0.081754,1.019077,0.116514,0.0347,0.026035,0.125707,0.341342,0.186278,...,2.738534,-1.203979,0.010742,-0.026367,0.755763,-0.727661,0.225586,0.197266,1.250041,stairs
2,-0.583608,0.596157,-0.490585,1.010749,0.070467,0.046796,0.043037,0.075362,0.265456,0.216323,...,2.114208,-0.762207,0.43457,-0.623047,0.759017,-0.40332,0.743164,-0.348633,1.208641,walking
3,-0.892698,0.126547,0.332406,1.020349,0.039711,0.074712,0.050111,0.046841,0.199277,0.273336,...,2.297987,-0.980469,-0.049316,0.189453,0.879827,-0.765137,0.306152,0.476074,1.119072,walking
4,-0.976068,0.09758,-0.052473,1.021467,0.114432,0.047016,0.035501,0.118538,0.338278,0.216832,...,2.693815,-1.182739,-0.039062,-0.165161,0.752464,-0.713379,0.235962,0.071289,1.226112,stairs
5,-0.912954,0.372957,-0.07406,1.015826,0.038684,0.031527,0.027877,0.044252,0.196684,0.177557,...,1.195542,-1.029175,0.257935,-0.181396,0.854349,-0.766602,0.476074,0.043335,1.121241,stairs
6,-0.632372,0.530408,-0.497541,1.0141,0.080119,0.052813,0.061828,0.095134,0.283052,0.229812,...,1.702119,-0.825806,0.355347,-0.64624,0.721878,-0.411133,0.689575,-0.345703,1.240125,walking
7,0.187115,0.391926,-0.874793,1.001327,0.030326,0.018197,0.008908,0.008656,0.174144,0.134896,...,1.268497,0.10498,0.328979,-0.904297,0.978043,0.284668,0.429199,-0.830933,1.011279,walking
8,-0.91379,0.125751,0.377383,1.021575,0.103376,0.03106,0.045554,0.129619,0.321522,0.17624,...,1.887539,-1.077637,0.011719,0.245117,0.747116,-0.654297,0.235229,0.476562,1.214726,stairs
9,-0.881041,0.096793,0.391017,1.021225,0.04811,0.049937,0.060186,0.053828,0.219339,0.223465,...,2.179278,-1.006348,-0.056641,0.219727,0.858749,-0.72998,0.257324,0.550293,1.144253,stairs


In [4]:
from sklearn.model_selection import train_test_split


X_train,X_test = train_test_split(final_earpiece, 
    test_size=0.2, 
    stratify=final_earpiece['class'], 
    random_state=42
)

y_train = X_train['class']
y_test = X_test['class']

final_X_train = X_train.drop(columns=['class'])
final_X_test = X_test.drop(columns=['class'])



NameError: name 'final_earpiece' is not defined

In [None]:
svm = SVC(kernel='rbf', C=10, gamma='scale', probability=True, random_state=42)
svm.fit(final_X_train, y_train)

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(final_X_train, y_train)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(final_X_train, y_train)

dt = DecisionTreeClassifier(random_state=42)
dt.fit(final_X_train, y_train)


lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(final_X_train, y_train)


if should_save_models:
    joblib.dump(svm, f"{models_output_folder}/svm_model_earpiece_adl.pkl")
    joblib.dump(rf, f"{models_output_folder}/rf_model_earpiece_adl.pkl")
    joblib.dump(knn, f"{models_output_folder}/knn_model_earpiece_adl.pkl")
    joblib.dump(dt, f"{models_output_folder}/dt_model_earpiece_adl.pkl")
    joblib.dump(lr, f"{models_output_folder}/lr_model_earpiece_adl.pkl")


In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"Model: {model.__class__.__name__}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    cm = confusion_matrix(y_test, y_pred)
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}\n")
    
    # Plot confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
    disp.plot(cmap='viridis')
    plt.title(f"Confusion Matrix for {model.__class__.__name__}")
    plt.show()

# Evaluate all models
evaluate_model(svm, final_X_test, y_test)
evaluate_model(rf, final_X_test, y_test)
evaluate_model(knn, final_X_test, y_test)
evaluate_model(dt, final_X_test, y_test)
evaluate_model(lr, final_X_test, y_test)