In [2]:
!pip3 install -U scikit-learn 

Collecting scikit-learn
  Using cached scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Using cached scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.2 MB)
Using cached threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.2 scipy-1.14.1 threadpoolctl-3.5.0
[0m

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np
import joblib
import os

In [10]:
csv_files = ["combined_eeg.csv", "combined_ecg.csv", "combined_emg.csv", "combined_eog.csv"]
combined_data = {}

In [11]:
for csv_file in csv_files:
    if os.path.exists(csv_file):
        df = pd.read_csv(csv_file)
        signal_type = csv_file.split('_')[1].split('.')[0]  # Extracting signal type (eeg, ecg, emg, eog)
        combined_data[signal_type] = df
    else:
        print(f"Warning: CSV file {csv_file} not found.")

In [12]:
model_list = {
    'SVM': SVC(probability=True),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier()
}


In [14]:
best_models = {}
X_train_dict, X_val_dict, X_test_dict, y_train_dict, y_val_dict, y_test_dict = {}, {}, {}, {}, {}, {}

for signal_type, df in combined_data.items():
    features = df.drop(columns=['Label'])
    labels = df['Label']

    # Split the data for each signal type (70% train, 20% validation, 10% test)
    X_train, X_temp, y_train, y_temp = train_test_split(features, labels, test_size=0.3, random_state=42, stratify=labels)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.3333, random_state=42, stratify=y_temp)  # 0.3333 * 0.3 = 0.1

    X_train_dict[signal_type] = X_train
    X_val_dict[signal_type] = X_val
    X_test_dict[signal_type] = X_test
    y_train_dict[signal_type] = y_train
    y_val_dict[signal_type] = y_val
    y_test_dict[signal_type] = y_test

    best_model = None
    best_f1 = 0

In [15]:
for model_name, model in model_list.items():
        if len(np.unique(y_train)) < 2:
            print(f"Skipping model training for {signal_type} due to only one class in training set.")
            continue
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)
        f1 = f1_score(y_val, y_pred, zero_division=1)

        print(f"{signal_type} - {model_name} F1-Score: {f1}")

        # Select the best model based on F1-score
        if f1 > best_f1:
            best_model = model
            best_f1 = f1

eog - SVM F1-Score: 0.5316831683168317
eog - Random Forest F1-Score: 0.738498789346247
eog - Gradient Boosting F1-Score: 0.6517626827171109
eog - Logistic Regression F1-Score: 0.5766871165644172
eog - K-Nearest Neighbors F1-Score: 0.6460788297440065


In [16]:
if best_model is not None:
        best_models[signal_type] = best_model
        print(f"Best model for {signal_type}: {type(best_model).__name__} with F1-Score: {best_f1}")
        # Save the best model for each signal type
        joblib.dump(best_model, f"best_model_{signal_type}.joblib")

Best model for eog: RandomForestClassifier with F1-Score: 0.738498789346247


In [17]:
def majority_voting(predictions):
    return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)

In [18]:
all_test_preds = []
for signal_type in combined_data.keys():
    if signal_type in X_test_dict and signal_type in best_models:
        X_test = X_test_dict[signal_type]
        model = best_models[signal_type]
        preds = model.predict(X_test)
        all_test_preds.append(preds)


In [19]:
if all_test_preds:
    final_predictions = majority_voting(np.array(all_test_preds))

    # Evaluation
    y_test_combined = pd.concat([y_test_dict[signal_type] for signal_type in combined_data.keys() if signal_type in y_test_dict and signal_type in best_models]).reset_index(drop=True)
    fused_accuracy = accuracy_score(y_test_combined, final_predictions)
    fused_precision = precision_score(y_test_combined, final_predictions, zero_division=1)
    fused_recall = recall_score(y_test_combined, final_predictions, zero_division=1)
    fused_f1 = f1_score(y_test_combined, final_predictions, zero_division=1)

    print(f"Fused Model - Accuracy: {fused_accuracy}, Precision: {fused_precision}, Recall: {fused_recall}, F1-Score: {fused_f1}")


Fused Model - Accuracy: 0.7494089834515366, Precision: 0.7520259319286872, Recall: 0.7376788553259142, F1-Score: 0.7447833065810594


In [20]:
 np.save("fused_model_predictions.npy", final_predictions)