In [22]:
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, stft
from pathlib import Path

#### MERGE CODE WITH SESSION ID

In [3]:
def combine_excel_sheets_to_csv_eeg_safe(excel_file_path, output_csv_path):

    xls = pd.ExcelFile(excel_file_path)
    sheet_names = xls.sheet_names

    # Read first sheet with header
    df_ref = pd.read_excel(excel_file_path, sheet_name=sheet_names[0])
    column_names = df_ref.columns.tolist()

    all_data = []

    for sheet_idx, sheet_name in enumerate(sheet_names):
        df = pd.read_excel(
            excel_file_path,
            sheet_name=sheet_name,
            header=0 if sheet_idx == 0 else None
        )

        if df.shape[1] != len(column_names):
            print(f"Skipping {sheet_name}: column mismatch")
            continue

        df.columns = column_names

        # Force numeric EEG
        df = df.apply(pd.to_numeric, errors="coerce")
        df = df.dropna()

        # Add session marker
        df["session_id"] = sheet_idx

        all_data.append(df)

        print(f"Added {sheet_name}: {len(df)} samples")

    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df.to_csv(output_csv_path, index=False)

    print(f"Final shape: {combined_df.shape}")

In [7]:
RAW_DATA_PATH = Path("./Data_raw/")

INPUT_FILE = "right eye open_1.xlsx"
OUTPUT_FILE = "combined.csv"

combine_excel_sheets_to_csv_eeg_safe(RAW_DATA_PATH / INPUT_FILE, RAW_DATA_PATH / OUTPUT_FILE)

Added Sheet1: 169500 samples
Added Sheet2: 30000 samples
Added Sheet3: 30000 samples
Added Sheet4: 30000 samples
Final shape: (259500, 22)


#### LOADING MERGED CSV

In [8]:
CSV_FILE = "combined.csv"   # your merged file
df = pd.read_csv(RAW_DATA_PATH / CSV_FILE)

print(df.shape)
print(df.columns)

(259500, 22)
Index(['Fp1-A1', 'Fp2-A2', 'F3-A1', 'F4-A2', 'C3-A1', 'C4-A2', 'P3-A1',
       'P4-A2', 'O1-A1', 'O2-A2', 'F7-A1', 'F8-A2', 'T3-A1', 'T4-A2', 'T5-A1',
       'T6-A2', 'Fz-Aav', 'Cz-Aav', 'Pz-Aav', 'T1-A1', 'T2-A2', 'session_id'],
      dtype='object')


#### SEPARATE EEG CHANNELS AND SESSION ID

In [9]:
EEG_CHANNELS = [
    "Fp1-A1", "Fp2-A2",
    "P3-A1", "P4-A2", "Pz-Aav",
    "O1-A1", "O2-A2",
    "Cz-Aav"
]

In [11]:
eeg = df[EEG_CHANNELS].values.T      # [C, T]
session_ids = df["session_id"].values

print(eeg.shape)  # [8, 259500]

(8, 259500)


#### BAND-PASS FILTERING Theta + Alpha (4–13 Hz)

In [13]:
from scipy.signal import butter, filtfilt

def bandpass_filter(eeg, fs, low=4, high=13, order=4):
    nyq = 0.5 * fs
    b, a = butter(order, [low / nyq, high / nyq], btype="band")
    return filtfilt(b, a, eeg, axis=1)

In [14]:
FS = 125  # confirm this
eeg = bandpass_filter(eeg, fs=FS)

#### SEGMENT INTO TRIALS (SESSION-AWARE)

In [15]:
def segment_trials_sessionwise(eeg, session_ids, fs, window_sec=10):
    samples_per_trial = fs * window_sec
    trials = []

    for sid in np.unique(session_ids):
        idx = np.where(session_ids == sid)[0]
        session_eeg = eeg[:, idx]

        total = session_eeg.shape[1]
        for start in range(0, total - samples_per_trial, samples_per_trial):
            trials.append(session_eeg[:, start:start + samples_per_trial])

    return np.stack(trials)

In [16]:
trials = segment_trials_sessionwise(
    eeg, session_ids, fs=FS, window_sec=10
)

print("Trials:", trials.shape)

Trials: (204, 8, 1250)


#### TIME–FREQUENCY TRANSFORM (STFT)

In [17]:
from scipy.signal import stft

def compute_stft_trials(trials, fs):
    tf_trials = []

    for trial in trials:
        ch_tf = []
        for ch in trial:
            f, t, Z = stft(
                ch,
                fs=fs,
                nperseg=fs,        # 1 second window
                noverlap=fs // 2   # 50% overlap
            )

            # Keep theta + alpha
            mask = (f >= 4) & (f <= 13)
            ch_tf.append(np.abs(Z[mask]))

        tf_trials.append(ch_tf)

    return np.array(tf_trials)

In [18]:
tf_trials = compute_stft_trials(trials, fs=FS)
print("TF trials:", tf_trials.shape)

TF trials: (204, 8, 10, 21)


#### CONVERT TO **PyTorch** TENSOR

In [23]:
import torch

In [19]:
X = torch.tensor(tf_trials, dtype=torch.float32)
print("Final input to CNN–LSTM:", X.shape)

Final input to CNN–LSTM: torch.Size([204, 8, 10, 21])


#### SAVING TENSORS AND METADATA

In [20]:
torch.save(X, "X_eeg_tf.pt")
print("Saved X_eeg_tf.pt with shape:", X.shape)

Saved X_eeg_tf.pt with shape: torch.Size([204, 8, 10, 21])


In [21]:
metadata = {
    "sampling_rate": 125,
    "channels": [
        "Fp1-A1", "Fp2-A2",
        "P3-A1", "P4-A2", "Pz-Aav",
        "O1-A1", "O2-A2",
        "Cz-Aav"
    ],
    "freq_band": "4–13 Hz",
    "window_sec": 10,
    "input_shape": X.shape
}

torch.save(metadata, "eeg_metadata.pt")