In [None]:
import numpy as np
import importlib
import pandas as pd
import tensorflow as ctf
import matplotlib.pyplot as plt
import seaborn as sns
import os
import shutil
import mne
import warnings
import datetime
from process_data import *




from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import TensorBoard
from keras.constraints import max_norm

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn import preprocessing, svm
from scipy.fft import rfft, rfftfreq

np.random.seed(23)

mne.set_log_level(verbose='warning') #to avoid info at terminal
warnings.filterwarnings(action = "ignore", category = DeprecationWarning ) 
warnings.filterwarnings(action = "ignore", category = FutureWarning ) 


In [None]:

# change to whatever path
root_dir = 'inner-speech-recognition'

In [None]:

# subject number, 1-10
N_s = 1
X, Y = extract_data_from_subject(root_dir, N_s, datatype="eeg")
Tstart = 1.5
Tend = 3.5
fs = 256
Subj = select_time_window(X = X, t_start = Tstart, t_end = Tend, fs = fs)



In [None]:
print("Shape of X before:", X.shape)

print("Shape of Y before:", Y.shape)
# Conditions to compared
Conditions = [["Inner"]]
# The class for the above condition
Classes    = [  ["all"] ]



X, Y = transform_for_classificator(X = Subj, Y = Y, classes = Classes, conditions = Conditions)



print("Shape of X after:", X.shape)
print("Shape of Y after:", Y.shape)

In [None]:

#Convert to microvolts
X_resize = X * (10**6)


data_labels= Y[:,1]


#For SVM, we will translate the integer labels
trials_dict = {0:'up',
               1:'down',
               2:'right',
               3:'left'}
trial_labels = np.array([trials_dict[direction] for direction in data_labels])
data_labels

In [None]:
# plots eeg signals from x top channels, for each label, either averaged over all trials or only first trial
def plot_eeg_signals(X_resize: np.ndarray, Y: np.ndarray, fs: int, top_channels: int = 10, average_trials: bool = True):
    """
    Plot EEG signals based on labels, with options to average across trials and select top channels.
    
    Parameters:
    - X_resize (np.ndarray): EEG data of shape (trials, channels, time).
    - Y (np.ndarray): Labels corresponding to the trials (shape: [trials, 1] or one-hot encoded).
    - fs (int): Sampling frequency of the EEG data.
    - top_channels (int): Number of most significant channels to plot based on variance.
    - average_trials (bool): Whether to average the trials or plot raw signals.
    """
    # Example: Create a time vector if not defined
    time = np.linspace(0, X_resize.shape[2] / fs, X_resize.shape[2])  # Adjust fs if necessary
    
    
    Y_labels = Y[:,1]
    #
    unique_labels = np.unique(data_labels)

    # Set up the figure to plot EEG signals for each label
    fig, axes = plt.subplots(len(unique_labels), 1, figsize=(12, len(unique_labels) * 4), sharex=True)

    # If only one subplot, axes is not an array, so we convert it to an array for consistency
    if len(unique_labels) == 1:
        axes = [axes]

    # Define the top channels based on variance
    top_channels_idx = np.argsort(np.var(X_resize, axis=(0, 2)))[-top_channels:]  # Select top channels based on variance

    for i, label in enumerate(unique_labels):
        # Filter EEG data for the current label
        X_label = X_resize[Y_labels == label]  # Select trials with the current label
        
        if average_trials:
            # Calculate the average across trials for selected channels
            X_label_avg = np.mean(X_label[:, top_channels_idx, :], axis=0)  # Average over the trials (axis=0)
            
            # Plot the average EEG signal for each selected channel
            for channel in range(X_label_avg.shape[0]):
                axes[i].plot(time, X_label_avg[channel, :], label=f"Channel {top_channels_idx[channel]+1}")
        
        else:
            for channel in range(len(top_channels_idx)):
                axes[i].plot(time, X_label[0, top_channels_idx[channel], :], label=f"Channel {top_channels_idx[channel]+1}")

        axes[i].set_title(f"EEG Signals for Label: {label}")
        axes[i].set_ylabel("Microvolts")
        axes[i].grid(True)
        axes[i].legend()

    # Set x-axis label
    plt.xlabel("Time (s)")
    plt.tight_layout()
    plt.show()

plot_eeg_signals(X_resize, Y, fs, top_channels = 5, average_trials = True)

NameError: name 'np' is not defined

In [None]:
# this plots eeg data from all channels for the first trial
for i in range(X_resize.shape[1]-1):
    plt.plot(X_resize[0,i,:]);
plt.title('EEG data, all channels for one trial')
plt.ylabel('Voltage (microV)')
plt.xlabel('Sample Number')
plt.show()

In [None]:
# FFT (used for input to SVM, other networks can just use X_resize or other formats)

N = X_resize.shape[2]
samples_fft = rfft(X_resize, axis = 2)
samples_freq = rfftfreq(N, 1/fs)

# Average the Fourier series across channels (if needed)
avg_fft = np.mean(samples_fft, axis=1)  # Average across the channels

# Calculate magnitude (real + imaginary part)
mag_fft = np.sqrt(avg_fft.real ** 2 + avg_fft.imag ** 2)

# Convert to dB (20 * log10 of the magnitude)
db_fft = 20 * np.log10(mag_fft)
# Adjusting frequency window from 4 to 50 Hz
data_fft = db_fft[:, 8:101]  # Index range for 4 to 50 Hz
data_freq = samples_freq[8:101]  # Corresponding frequency values

# Select the frequency range of interest (4 Hz to 40 Hz)
data_fft = db_fft[:, 8:81]  # Frequency range 4-40 Hz (indexing)
data_freq = samples_freq[8:81]  # Frequency range 4-40 Hz
# Ensure data is real-valued
data_fft = np.real(data_fft)  # Make sure the data is real (in case any imaginary parts remain)



In [None]:
x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(data_fft, trial_labels, test_size=0.2, shuffle=True)
# Generate demo SVM model
model = svm.SVC()
model.fit(x_train_svm, y_train_svm)
#Evaluate the model
predictions = model.predict(x_test_svm)
print(classification_report(y_test_svm, predictions))


In [None]:

#Evaluate the model
predictions = model.predict(x_test_svm)
print(classification_report(y_test_svm, predictions))