In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
from collections import Counter

input_file_segments = '/content/drive/MyDrive/Project/ExtractedSegments_1s_set4.npy'
input_file_labels = '/content/drive/MyDrive/Project/ExtractedLabelas_1s_set4.npy'

loaded_segments = np.load(input_file_segments, allow_pickle=True)
loaded_labels = np.load(input_file_labels, allow_pickle=True)

print(f'Total segments loaded: {len(loaded_segments)}')
print(f'Total labels loaded: {len(loaded_labels)}')
print(f'Sample segment shape: {loaded_segments[0].shape}')
print(f'Sample label: {loaded_labels[0]}')

label_counts = Counter(loaded_labels)
print(f'Label counts: {label_counts}')

max_count = max(label_counts.values())

def add_gaussian_noise(data, mean=0, std_dev=0.05):
    noise = np.random.normal(mean, std_dev, data.shape)
    return data + noise

augmented_segments = []
augmented_labels = []

for segment, label in zip(loaded_segments, loaded_labels):
    if segment.shape == (125, 16):
        augmented_segments.append(segment)
        augmented_labels.append(label)
    else:
        print(f"Skipping segment with invalid shape: {segment.shape}")

for label, count in label_counts.items():
    if count < max_count:
        label_segments = [seg for seg, lbl in zip(loaded_segments, loaded_labels) if lbl == label]

        num_samples_needed = max_count - count

        for _ in range(num_samples_needed):
            segment = label_segments[np.random.randint(len(label_segments))]
            noisy_segment = add_gaussian_noise(segment)

            if noisy_segment.shape == (125, 16):
                augmented_segments.append(noisy_segment)
                augmented_labels.append(label)
            else:
                print(f"Generated segment with invalid shape: {noisy_segment.shape}")

augmented_segments = np.array(augmented_segments)
augmented_labels = np.array(augmented_labels)

print(f'Augmented data shape: {augmented_segments.shape}')
print(f'Augmented labels shape: {augmented_labels.shape}')
print(f'New label counts: {Counter(augmented_labels)}')

Total segments loaded: 937
Total labels loaded: 937
Sample segment shape: (125, 16)
Sample label: Walking
Label counts: Counter({'Walking': 290, 'Aha': 290, 'Doing Other Task': 290, 'Impasse': 42, 'Re-evaluation': 25})
Augmented data shape: (1450, 125, 16)
Augmented labels shape: (1450,)
New label counts: Counter({'Walking': 290, 'Aha': 290, 'Doing Other Task': 290, 'Impasse': 290, 'Re-evaluation': 290})


In [None]:
data = augmented_segments
final_labels = augmented_labels

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

In [None]:
def EEGNet_feature_extractor(Chans, Samples):
    input1 = layers.Input(shape=(Samples, Chans, 1))

    block1 = layers.Conv2D(16, (1, 64), padding='same')(input1)
    block1 = layers.BatchNormalization()(block1)
    block1 = layers.DepthwiseConv2D((Chans, 1), use_bias=False, depth_multiplier=2)(block1)
    block1 = layers.BatchNormalization()(block1)
    block1 = layers.Activation('elu')(block1)
    block1 = layers.AveragePooling2D((1, 4))(block1)
    block1 = layers.Dropout(0.25)(block1)

    block2 = layers.SeparableConv2D(16, (1, 16), padding='same')(block1)
    block2 = layers.BatchNormalization()(block2)
    block2 = layers.Activation('elu')(block2)
    block2 = layers.AveragePooling2D((1, 2))(block2)
    block2 = layers.Dropout(0.25)(block2)

    flatten = layers.Flatten()(block2)

    return models.Model(inputs=input1, outputs=flatten)

CNN_segments = np.array(data, dtype=np.float32)
feature_extractor = EEGNet_feature_extractor(Samples=125, Chans=16)

print(feature_extractor.summary())

features_CNN = feature_extractor.predict(CNN_segments)
print("Shape of extracted train features:", features_CNN.shape)

None
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Shape of extracted train features: (1450, 3520)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import numpy as np
import pandas as pd

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(final_labels)

concatenated_array_features = np.array(features_CNN , dtype=np.float32)

X_train, X_test, y_train, y_test = train_test_split(concatenated_array_features, encoded_labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

classifiers = {
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(),
    "Naive Bayes": GaussianNB()
}

results = {}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    results[name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }

    print(f"Results for {name}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("")

results_df = pd.DataFrame(results).T
print("\nSummary of Classifier Performance:")
print(results_df)

Results for SVM:
Accuracy: 0.3172
Precision: 0.3242
Recall: 0.3172
F1 Score: 0.2419

Results for Random Forest:
Accuracy: 0.5483
Precision: 0.5511
Recall: 0.5483
F1 Score: 0.5438



Parameters: { "use_label_encoder" } are not used.



Results for XGBoost:
Accuracy: 0.5552
Precision: 0.5558
Recall: 0.5552
F1 Score: 0.5537

Results for Gradient Boosting:
Accuracy: 0.5621
Precision: 0.5652
Recall: 0.5621
F1 Score: 0.5597





Results for AdaBoost:
Accuracy: 0.3000
Precision: 0.3434
Recall: 0.3000
F1 Score: 0.3043

Results for K-Nearest Neighbors:
Accuracy: 0.4517
Precision: 0.3679
Recall: 0.4517
F1 Score: 0.3784

Results for Logistic Regression:
Accuracy: 0.4483
Precision: 0.4114
Recall: 0.4483
F1 Score: 0.4202

Results for Naive Bayes:
Accuracy: 0.1724
Precision: 0.4184
Recall: 0.1724
F1 Score: 0.1309


Summary of Classifier Performance:
                     Accuracy  Precision    Recall  F1 Score
SVM                  0.317241   0.324165  0.317241  0.241932
Random Forest        0.548276   0.551057  0.548276  0.543821
XGBoost              0.555172   0.555779  0.555172  0.553720
Gradient Boosting    0.562069   0.565155  0.562069  0.559727
AdaBoost             0.300000   0.343418  0.300000  0.304270
K-Nearest Neighbors  0.451724   0.367868  0.451724  0.378417
Logistic Regression  0.448276   0.411430  0.448276  0.420230
Naive Bayes          0.172414   0.418444  0.172414  0.130927


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
