# Load the EEG Signals:

In [1]:
import scipy.io

# Carga el archivo .mat
mat = scipy.io.loadmat('eegdata.mat')
mat2= scipy.io.loadmat('eeg_shirt_sweat.mat')

# Extrae el contenido bajo la clave 'eeg_Data'
eeg_signals = mat['eeg_Data']
eeg_signals2 = mat2['eeg_Data']

# Iterar sobre las celdas para extraer cada señal
signals = [eeg_signals[0, i] for i in range(eeg_signals.shape[1])]
signals_ss = [eeg_signals[0,i] for i in range(eeg_signals2.shape[1])]

In [2]:
import numpy as np

# Convertir la lista de señales en una matriz 3D
eeg = np.stack(signals).transpose(0, 2, 1)
eeg_ss=np.stack(signals_ss).transpose(0,2,1)

# Índices de los canales frontales
frontal_indices = [0, 1, 2, 3, 10, 11, 12, 13]

# Filtrar solo los canales frontales
eeg = eeg[:, frontal_indices, :]
eeg_ss= eeg_ss[:, frontal_indices, :]

# Mostrar la forma de las matrices con solo canales frontales
print(eeg.shape)
print(eeg_ss.shape)

(895, 8, 512)
(150, 8, 512)


In [3]:
import numpy as np
from scipy.signal import butter, filtfilt

# Definir el filtro pasa-banda
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data, axis=0)  # Aplicar el filtro en el eje de las muestras
    return y

# Parámetros del filtro
lowcut = 1.0  # Frecuencia de corte baja
highcut = 40.0  # Frecuencia de corte alta
fs = 128  # Frecuencia de muestreo en Hz

In [4]:
# Aplicar el filtro pasa-banda a cada canal de cada señal
filtered_data = np.zeros_like(eeg)

num_signals, num_channels, num_samples = eeg.shape

# Aplicar el filtro pasa-banda a cada canal de cada señal
filtered_data = np.zeros_like(eeg)

for i in range(num_signals):
    for j in range(num_channels):
        filtered_data[i, j, :] = bandpass_filter(eeg[i, j, :], lowcut, highcut, fs)

# Aplicar el filtro pasa-banda a cada canal de cada señal
filtered_data2 = np.zeros_like(eeg_ss)

num_signals, num_channels, num_samples = eeg_ss.shape

for i in range(num_signals):
    for j in range(num_channels):
        filtered_data2[i, j, :] = bandpass_filter(eeg_ss[i, j, :], lowcut, highcut, fs)

# Mostrar la forma de la matriz 3D
print(filtered_data.shape)
print(filtered_data2.shape)

(895, 8, 512)
(150, 8, 512)


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split

# Shirt and sweater data:
dftrainSS = pd.read_csv("/home/andres_marin/Notebooks/Tesis/labels/Labels_Shirt_Sweater.csv",
                     header=None)
labelsSS = dftrainSS.iloc[:, 0].values

df = pd.read_csv("/home/andres_marin/Notebooks/Tesis/labels/Labels_All_Data.csv",
                     header=None)
labels = df.iloc[:, 0].values

# Verificar la forma y la distribución de las etiquetas
print("Labels shape:", labels.shape)
print("Firts values:", labels[:10])  # Muestra las primeras 10 etiquetas

# Verificar la distribución de las etiquetas
label_distribution = Counter(labels)
print("Labels dsitribution:", label_distribution)

Labels shape: (895,)
Firts values: [1 0 1 0 1 0 0 1 0 1]
Labels dsitribution: Counter({0: 502, 1: 393})


In [8]:
# Dividir los datos en conjuntos de entrenamiento y validación
x_div1, x_div2, y_div1, y_div2 = train_test_split(
    filtered_data2, labelsSS, test_size=0.50, stratify=labelsSS, random_state=42)

In [9]:
# Dividir los datos en conjuntos de entrenamiento y validación
x_train, x_test, y_train, y_test = train_test_split(
    filtered_data, labels, test_size=0.25, stratify=labels, random_state=42)

# Verificar la distribución de las clases en los conjuntos de entrenamiento y validación
train_counts = Counter(y_train)
val_counts = Counter(y_test)

print("\nDistribución de clases en el conjunto de entrenamiento:", train_counts)
print("Distribución de clases en el conjunto de validación:", val_counts)


Distribución de clases en el conjunto de entrenamiento: Counter({0: 376, 1: 295})
Distribución de clases en el conjunto de validación: Counter({0: 126, 1: 98})


In [11]:
# Suponiendo que tus etiquetas están en un vector con valores 0 o 1
y_train_cat = to_categorical(y_train, num_classes=2)
y_test1_cat = to_categorical(y_test1, num_classes=2)
y_test_cat = to_categorical(y_test, num_classes=2)
y_div2_cat = to_categorical(y_div2, num_classes=2)

# EEGNet:

In [None]:
import optuna
import tensorflow as tf
from EEGModels import EEGNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

# Lista para almacenar los mejores modelos y sus respectivas precisiones
top_models = []
top_accuracies = []

# Lista para almacenar los mejores modelos y sus métricas
top_models = []

def compute_metrics(y_true, y_pred):
    # Confusion matrix: [TN, FP], [FN, TP]
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    return accuracy, specificity, sensitivity

def objective(trial):
    # Hiperparámetros que deseas optimizar
    seed = trial.suggest_int('seed', 0, 1000000)

    # Configurar la semilla
    np.random.seed(seed)
    tf.random.set_seed(seed)

    # Configurar la validación cruzada
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    
    accuracies = []
    specificities = []
    sensitivities = []

    for train_idx, test_idx in skf.split(x_data, y_data):
        x_train, x_test = x_data[train_idx], x_data[test_idx]
        y_train, y_test = y_data[train_idx], y_data[test_idx]

        # Convertir etiquetas a categóricas
        y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=2)
        y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=2)

        # Crear el modelo
        model = EEGNet(nb_classes=2, Chans=num_channels, Samples=num_samples)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

        # Entrenar el modelo
        model.fit(x_train, y_train_cat, epochs=300, batch_size=32, verbose=0, 
                  validation_data=(x_test, y_test_cat), callbacks=[early_stopping])

        # Evaluar el modelo
        y_pred = np.argmax(model.predict(x_test), axis=1)
        accuracy, specificity, sensitivity = compute_metrics(y_test, y_pred)
        
        accuracies.append(accuracy)
        specificities.append(specificity)
        sensitivities.append(sensitivity)

    # Calcular métricas promedio y desviaciones estándar
    avg_accuracy = np.mean(accuracies)
    avg_specificity = np.mean(specificities)
    avg_sensitivity = np.mean(sensitivities)

    std_accuracy = np.std(accuracies)
    std_specificity = np.std(specificities)
    std_sensitivity = np.std(sensitivities)

    # Guardar el modelo si está en el top 5 basado en la métrica promedio de exactitud
    top_models.append((model, avg_accuracy, avg_specificity, avg_sensitivity))
    top_models.sort(key=lambda x: x[1], reverse=True)

    if len(top_models) > 5:
        top_models.pop()

    # Mostrar métricas por cada evaluación
    print(f"Seed: {seed} | Accuracy: {avg_accuracy:.3f} ± {std_accuracy:.3f} | "
          f"Specificity: {avg_specificity:.3f} ± {std_specificity:.3f} | "
          f"Sensitivity: {avg_sensitivity:.3f} ± {std_sensitivity:.3f}")

    return avg_accuracy

# Crear el estudio de Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Obtener la mejor semilla
best_seed = study.best_trial.params['seed']
print(f"La mejor semilla es: {best_seed}")

**Tests:**

In [None]:
# Select the model:
best_model = top_models[0][0]

# Pred with train data:
y_pred = best_model.predict(x_train)

# Pred with test data:
y_pred2 = best_model.predict(x_test1)

y_pred3 = best_model.predict(x_div2)

# Pred with shirt & sweater data:
#y_pred3 = (model.predict(x_testss_nuevo) > 0.5).astype(int)

# Calcular la precisión
accuracy1 = accuracy_score(np.argmax(y_train_cat, axis=1), np.argmax(y_pred, axis=1))
accuracy2 = accuracy_score(np.argmax(y_test1_cat, axis=1), np.argmax(y_pred2, axis=1))
accuracy3 = accuracy_score(np.argmax(y_div2_cat, axis=1), np.argmax(y_pred3, axis=1))
#accuracy3 = accuracy_score(y_div2, y_pred3)

print("Precisión del modelo en el conjunto de train: {:.2f}%".format(accuracy1 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy2 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy3 * 100))
#print("Precisión del modelo en el conjunto de prueba de camisas y sweaters: {:.2f}%".format(accuracy3 * 100))

In [None]:
# Guardar el modelo en el disco
model_path = '/home/andres_marin/Notebooks/Tesis/Deep_Learning_Test/Models/Frontal_ch_EEGNet_75_70_2.h5'  # La extensión .h5 indica que se usa el formato HDF5
best_model.save(model_path)
print(f"Modelo 1 guardado en {model_path}")

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

y_pred2 = (y_pred2 > 0.5).astype(int)

# Calcular la matriz de confusión
conf_matrix = confusion_matrix(y_test1, y_pred2)

# Graficar la matriz de confusión usando seaborn
plt.figure(figsize=(10,7))
sns.heatmap(conf_matrix, annot=True, fmt='g', cmap='Blues')
plt.title('Matriz de Confusión')
plt.xlabel('Predicciones')
plt.ylabel('Valores Verdaderos')
plt.show()

In [None]:
best_model.summary()

In [15]:
# Valores de la nueva matriz de confusión
TP = 90
FN = 40
FP = 30
TN = 65


# Cálculo de métricas en porcentaje
accuracy_percentage = ((TP + TN) / (TP + TN + FP + FN)) * 100
precision_percentage = (TP / (TP + FP)) * 100
specificity_percentage = (TN / (TN + FP)) * 100
recall_percentage = (TP / (TP + FN)) * 100
f1_score_percentage = 2 * (precision_percentage * recall_percentage) / (precision_percentage + recall_percentage)

accuracy_percentage, precision_percentage, specificity_percentage, recall_percentage, f1_score_percentage

#mcc = calculate_mcc(TP, TN, FP, FN)* 100

balanced_accuracy = (recall_percentage + specificity_percentage)/2

print("Accuracy percentage:", accuracy_percentage)
#print("precision_percentage:", precision_percentage)
print("specificity_percentage:", specificity_percentage)
print("recall_percentage:", recall_percentage)
#print("f1_score_percentage:", f1_score_percentage)
#print("Matthews Correlation Coefficient (MCC):", mcc)
#print("Balanced accuracy:", balanced_accuracy)

Accuracy percentage: 68.88888888888889
specificity_percentage: 68.42105263157895
recall_percentage: 69.23076923076923


# DeepConvNet

In [None]:
import optuna
import tensorflow as tf
from EEGModels import EEGNet, DeepConvNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

# Lista para almacenar los mejores modelos y sus respectivas precisiones
top_models = []
top_accuracies = []

def compute_metrics(y_true, y_pred):
    # Confusion matrix: [TN, FP], [FN, TP]
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    return accuracy, specificity, sensitivity

def objective(trial):
    # Hiperparámetros que deseas optimizar
    seed = trial.suggest_int('seed', 0, 1000000)

    # Configurar la semilla
    np.random.seed(seed)
    tf.random.set_seed(seed)

    # Configurar la validación cruzada
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    
    accuracies = []
    specificities = []
    sensitivities = []

    for train_idx, test_idx in skf.split(x_data, y_data):
        x_train, x_test = x_data[train_idx], x_data[test_idx]
        y_train, y_test = y_data[train_idx], y_data[test_idx]

        # Convertir etiquetas a categóricas
        y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=2)
        y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=2)

        # Crear el modelo
        model = DeepConvNet(nb_classes=2, Chans=num_channels, Samples=num_samples)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

        # Entrenar el modelo
        model.fit(x_train, y_train_cat, epochs=300, batch_size=32, verbose=0, 
                  validation_data=(x_test, y_test_cat), callbacks=[early_stopping])

        # Evaluar el modelo
        y_pred = np.argmax(model.predict(x_test), axis=1)
        accuracy, specificity, sensitivity = compute_metrics(y_test, y_pred)
        
        accuracies.append(accuracy)
        specificities.append(specificity)
        sensitivities.append(sensitivity)

    # Calcular métricas promedio y desviaciones estándar
    avg_accuracy = np.mean(accuracies)
    avg_specificity = np.mean(specificities)
    avg_sensitivity = np.mean(sensitivities)

    std_accuracy = np.std(accuracies)
    std_specificity = np.std(specificities)
    std_sensitivity = np.std(sensitivities)

    # Guardar el modelo si está en el top 5 basado en la métrica promedio de exactitud
    top_models.append((model, avg_accuracy, avg_specificity, avg_sensitivity))
    top_models.sort(key=lambda x: x[1], reverse=True)

    if len(top_models) > 5:
        top_models.pop()

    # Mostrar métricas por cada evaluación
    print(f"Seed: {seed} | Accuracy: {avg_accuracy:.3f} ± {std_accuracy:.3f} | "
          f"Specificity: {avg_specificity:.3f} ± {std_specificity:.3f} | "
          f"Sensitivity: {avg_sensitivity:.3f} ± {std_sensitivity:.3f}")

    return avg_accuracy

# Crear el estudio de Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Obtener la mejor semilla
best_seed = study.best_trial.params['seed']
print(f"La mejor semilla es: {best_seed}")

In [None]:
# Select the model:
best_model = top_models[3][0]

# Pred with train data:
y_pred = best_model.predict(x_train)

# Pred with test data:
y_pred2 = best_model.predict(x_test1)

y_pred3 = best_model.predict(x_div2)

# Pred with shirt & sweater data:
#y_pred3 = (model.predict(x_testss_nuevo) > 0.5).astype(int)

# Calcular la precisión
accuracy1 = accuracy_score(np.argmax(y_train_cat, axis=1), np.argmax(y_pred, axis=1))
accuracy2 = accuracy_score(np.argmax(y_test1_cat, axis=1), np.argmax(y_pred2, axis=1))
accuracy3 = accuracy_score(np.argmax(y_div2_cat, axis=1), np.argmax(y_pred3, axis=1))
#accuracy3 = accuracy_score(y_div2, y_pred3)

print("Precisión del modelo en el conjunto de train: {:.2f}%".format(accuracy1 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy2 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy3 * 100))
#print("Precisión del modelo en el conjunto de prueba de camisas y sweaters: {:.2f}%".format(accuracy3 * 100))

In [None]:
# Guardar el modelo en el disco
model_path = '/home/andres_marin/Notebooks/Tesis/Deep_Learning_Test/Models/DeepNet_77_73.h5'  # La extensión .h5 indica que se usa el formato HDF5
best_model.save(model_path)
print(f"Modelo 1 guardado en {model_path}")

# ShallowConvNet

In [None]:
import optuna
import tensorflow as tf
from EEGModels import EEGNet, ShallowConvNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np

# Lista para almacenar los mejores modelos y sus respectivas precisiones
top_models = []
top_accuracies = []

def compute_metrics(y_true, y_pred):
    # Confusion matrix: [TN, FP], [FN, TP]
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    return accuracy, specificity, sensitivity

def objective(trial):
    # Hiperparámetros que deseas optimizar
    seed = trial.suggest_int('seed', 0, 1000000)

    # Configurar la semilla
    np.random.seed(seed)
    tf.random.set_seed(seed)

    # Configurar la validación cruzada
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    
    accuracies = []
    specificities = []
    sensitivities = []

    for train_idx, test_idx in skf.split(x_data, y_data):
        x_train, x_test = x_data[train_idx], x_data[test_idx]
        y_train, y_test = y_data[train_idx], y_data[test_idx]

        # Convertir etiquetas a categóricas
        y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes=2)
        y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes=2)

        # Crear el modelo
        model = ShallowConvNet(nb_classes=2, Chans=num_channels, Samples=num_samples)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

        # Entrenar el modelo
        model.fit(x_train, y_train_cat, epochs=300, batch_size=32, verbose=0, 
                  validation_data=(x_test, y_test_cat), callbacks=[early_stopping])

        # Evaluar el modelo
        y_pred = np.argmax(model.predict(x_test), axis=1)
        accuracy, specificity, sensitivity = compute_metrics(y_test, y_pred)
        
        accuracies.append(accuracy)
        specificities.append(specificity)
        sensitivities.append(sensitivity)

    # Calcular métricas promedio y desviaciones estándar
    avg_accuracy = np.mean(accuracies)
    avg_specificity = np.mean(specificities)
    avg_sensitivity = np.mean(sensitivities)

    std_accuracy = np.std(accuracies)
    std_specificity = np.std(specificities)
    std_sensitivity = np.std(sensitivities)

    # Guardar el modelo si está en el top 5 basado en la métrica promedio de exactitud
    top_models.append((model, avg_accuracy, avg_specificity, avg_sensitivity))
    top_models.sort(key=lambda x: x[1], reverse=True)

    if len(top_models) > 5:
        top_models.pop()

    # Mostrar métricas por cada evaluación
    print(f"Seed: {seed} | Accuracy: {avg_accuracy:.3f} ± {std_accuracy:.3f} | "
          f"Specificity: {avg_specificity:.3f} ± {std_specificity:.3f} | "
          f"Sensitivity: {avg_sensitivity:.3f} ± {std_sensitivity:.3f}")

    return avg_accuracy

# Crear el estudio de Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)

# Obtener la mejor semilla
best_seed = study.best_trial.params['seed']
print(f"La mejor semilla es: {best_seed}")

In [None]:
# Select the model:
best_model = top_models[2][0]

# Pred with train data:
y_pred = best_model.predict(x_train)

# Pred with test data:
y_pred2 = best_model.predict(x_test1)

y_pred3 = best_model.predict(x_div2)

# Pred with shirt & sweater data:
#y_pred3 = (model.predict(x_testss_nuevo) > 0.5).astype(int)

# Calcular la precisión
accuracy1 = accuracy_score(np.argmax(y_train_cat, axis=1), np.argmax(y_pred, axis=1))
accuracy2 = accuracy_score(np.argmax(y_test1_cat, axis=1), np.argmax(y_pred2, axis=1))
accuracy3 = accuracy_score(np.argmax(y_div2_cat, axis=1), np.argmax(y_pred3, axis=1))
#accuracy3 = accuracy_score(y_div2, y_pred3)

print("Precisión del modelo en el conjunto de train: {:.2f}%".format(accuracy1 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy2 * 100))
print("Precisión del modelo en el conjunto de prueba: {:.2f}%".format(accuracy3 * 100))
#print("Precisión del modelo en el conjunto de prueba de camisas y sweaters: {:.2f}%".format(accuracy3 * 100))

In [None]:
# Guardar el modelo en el disco
model_path = '/home/andres_marin/Notebooks/Tesis/Deep_Learning_Test/Models/ShallowConvNet.h5'  # La extensión .h5 indica que se usa el formato HDF5
best_model.save(model_path)
print(f"Modelo 1 guardado en {model_path}")