# TCC - An√°lise de Sinais EEG

Este notebook apresenta um fluxo de trabalho completo para a an√°lise de sinais de EEG, desde a leitura e pr√©-processamento dos dados at√© a constru√ß√£o, treinamento e avalia√ß√£o de um modelo de aprendizado profundo para classifica√ß√£o.

## 1. Bibliotecas e Configura√ß√µes Iniciais

Importa√ß√£o das bibliotecas necess√°rias e configura√ß√£o do ambiente, incluindo a aloca√ß√£o de mem√≥ria da GPU, se dispon√≠vel.

In [None]:
#!rclone mount drive-thiago: ~/gdrive --daemon
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Configura√ß√£o da GPU (opcional)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)]
        )
    except RuntimeError as e:
        print(e)

DATASET = 'MU'
N_CLASSES = 10

match DATASET:
    case 'MU':
        N_TIMESTEPS = 440
        N_FEATURES = 4

    case 'EP':
        N_TIMESTEPS = 256
        N_FEATURES = 14

## 2. Leitura dos Dados

Leitura do arquivo bruto e convers√£o dos dados em um formato adequado para o treinamento do modelo de deep learning. Cada amostra √© convertida em um array NumPy com shape `(n_amostras, TARGET_LEN, n_canais)`.

In [None]:
try:
    df = pd.read_csv(f'{DATASET}_train.csv')
except FileNotFoundError:
    df = pd.read_csv(f"hf://datasets/DavidVivancos/MindBigData2022_MNIST_{DATASET}/train.csv")
    df.to_csv(f'{DATASET}_train.csv', index=False)

df = df[df['label'] != -1]

In [None]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

X = X.reshape((-1, N_TIMESTEPS, N_FEATURES))
y = y - y.min() 

## 3. Pr√©-processamento e Filtragem dos Sinais

Aplica√ß√£o de filtros para remover ru√≠dos e artefatos dos sinais de EEG. As seguintes t√©cnicas s√£o utilizadas:
- **Filtro Butterworth Passa-Alta:** Para remover a flutua√ß√£o da linha de base.
- **Filtro Notch:** Para remover a interfer√™ncia da rede el√©trica (60 Hz).
- **Denoising com Transformada Wavelet Discreta (DWT):** Para atenuar ru√≠dos de alta frequ√™ncia.

In [None]:
from scipy.signal import butter, filtfilt, iirnotch
import pywt

def butterworth_highpass(data, fs=220, cutoff=0.1, order=5):
    b, a = butter(order, cutoff / (fs / 2), btype="high", analog=False)
    return filtfilt(b, a, data)

def notch_filter(data, fs=220, freq=60.0, Q=30.0):
    b, a = iirnotch(w0=freq/(fs/2), Q=Q)
    return filtfilt(b, a, data)

def dwt_denoise_reconstruct(signal, wavelet='db4', level=3, mode='soft'):
    coeffs = pywt.wavedec(signal, wavelet=wavelet, level=level)
    n = len(signal)
    for i in range(1, len(coeffs)):
        cd = coeffs[i]
        sigma = np.median(np.abs(cd)) / 0.6745 if cd.size > 0 else 0.0
        thresh = sigma * np.sqrt(2 * np.log(n)) if sigma > 0 else 0.0
        coeffs[i] = pywt.threshold(cd, thresh, mode=mode)
    rec = pywt.waverec(coeffs, wavelet=wavelet)
    return np.asarray(rec[:n])

In [None]:
X_filtered = np.zeros_like(X, dtype=float)

n_samples, timesteps, n_channels = X.shape

for i in range(n_samples):
    for ch in range(n_channels):
        signal = X[i, :, ch].astype(float)
        signal = butterworth_highpass(signal, cutoff=0.1, order=5)
        signal = notch_filter(signal, freq=60.0, Q=30.0)
        signal = dwt_denoise_reconstruct(signal, wavelet='db4', level=3, mode='soft')
        X_filtered[i, :, ch] = signal

X = X_filtered

## 4. Normaliza√ß√£o

Os dados s√£o normalizados utilizando Z-score seguido por `MinMaxScaler` para escalar os valores entre 0 e 1. 

In [None]:
def normalize_teste(X: np.ndarray):
    means = X.mean(axis=0)
    stds = X.std(axis=0)
    X_zscore = (X - means) / (stds + 1e-8)

    scaler = MinMaxScaler() 

    X = scaler.fit_transform(X_zscore)

    return X

def normalize(X: np.ndarray):
    mu = X.mean(axis=(0, 1), keepdims=True)
    sigma = X.std(axis=(0, 1), keepdims=True)
    sigma[sigma == 0] = 1.0

    X_z = (X - mu) / sigma

    X_final = np.zeros_like(X_z)

    n_channels = X_z.shape[2]

    for ch in range(n_channels):
        vals = X_z[:, :, ch].reshape(-1, 1)  # Flatten canal: (N*T, 1)
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaler.fit(vals)  # um √∫nico min/max para todo o canal
    
        for i in range(X_z.shape[0]):  # aplica em cada amostra
            X_final[i, :, ch] = scaler.transform(X_z[i, :, ch].reshape(-1, 1)).flatten()
        
    return X_final

In [None]:
X = normalize(X)

## 5. Constru√ß√£o do Modelo

Defini√ß√£o da arquitetura do modelo, que consiste em uma rede neural recorrente com camadas LSTM bidirecionais, dropout para regulariza√ß√£o e camadas densas para a classifica√ß√£o final.

In [None]:
def create_simple_model():

    input = Input(shape=(N_TIMESTEPS, N_FEATURES))
    x = Bidirectional(LSTM(units=32, return_sequences=True))(input)
    x = Bidirectional(LSTM(units=16, return_sequences=False))(x)
    x = Dense(64, activation='elu')(x)
    output = Dense(N_CLASSES, activation='softmax')(x)

    model = tf.keras.Model(input, output)
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

def create_model():

    input = Input(shape=(N_TIMESTEPS, N_FEATURES))
    x = Bidirectional(LSTM(units=N_TIMESTEPS, return_sequences=True))(input)
    x = Dropout(0.1)(x)
    x = Bidirectional(LSTM(units=N_TIMESTEPS // 2, return_sequences=True))(x)
    x = Dropout(0.1)(x)
    x = Bidirectional(LSTM(units=N_TIMESTEPS // 4, return_sequences=False))(x)
    x = Dropout(0.1)(x)
    x = Flatten()(x)
    x = Dense(128, activation='elu')(x)
    output = Dense(N_CLASSES, activation='softmax')(x)

    model = tf.keras.Model(input, output)
    
    model.compile(
        optimizer=Adam(learning_rate=0.00003125),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

model = create_model()

model.summary()

## 6. Treinamento do Modelo

Treinamento do modelo com os dados preparados. S√£o utilizados callbacks para `EarlyStopping` (interromper o treino se a performance n√£o melhorar) e `ModelCheckpoint` (salvar o melhor modelo encontrado durante o treino).

In [None]:
early_stop = EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    'melhor_modelo.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=0
)

history = model.fit(
    X, y,
    validation_split=0.2,
    epochs=10, 
    batch_size=64,
    callbacks=[early_stop, checkpoint],
    verbose=1
)

## 7. Cria√ß√£o de Gr√°ficos

In [None]:
import matplotlib.pyplot as plt

accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs, accuracy, "bo-", label="Training accuracy")
plt.plot(epochs, val_accuracy, "b-", label="Validation accuracy")
plt.title("Training and Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(epochs, loss, "ro-", label="Training loss")
plt.plot(epochs, val_loss, "r-", label="Validation loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)

PATH = "(Simple Model) - Gr√°fico 4 - Preprocessing and Normalization.png"
plt.suptitle("Training Results with Preprocessing and Normalization", fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(PATH, dpi=300)
plt.close()

print(f"Figura salva como: {PATH}")

## 8. Avalia√ß√£o do Modelo

Avalia√ß√£o da performance do modelo treinado no conjunto de teste. S√£o calculadas m√©tricas como acur√°cia, precis√£o, recall e F1-score, al√©m da exibi√ß√£o de um relat√≥rio de classifica√ß√£o detalhado por classe.

In [None]:
def validate(model, X_test, y_test):
    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    print(f"\nüìä Desempenho no conjunto de teste:")
    print(f"Acur√°cia: {acc:.4f}")
    print(f"Precis√£o (macro): {prec:.4f}")
    print(f"Recall (macro): {rec:.4f}")
    print(f"F1-score (macro): {f1:.4f}")

    print("\nRelat√≥rio por classe:")
    print(classification_report(y_test, y_pred, digits=4))

In [None]:
df = pd.read_csv(f"hf://datasets/DavidVivancos/MindBigData2022_MNIST_{DATASET}/test.csv")

df = df[df['label'] != -1]

X_test = df.iloc[:, 1:].values
y_test = df.iloc[:, 0].values

model = load_model('melhor_modelo.keras')
validate(model, X_test, y_test)