<a href="https://colab.research.google.com/github/AlperYildirim1/The-Pathogenic-Echo-Hypotesis/blob/main/Kauvar.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install awscli

In [None]:
!aws s3 sync s3://openneuro.org/ds003004 ./ds003004 --no-sign-request

In [None]:
# =============================================================
# EXPLORATION SCRIPT for ds003004
# Goal: Understand the data structure and event timings.
# =============================================================

import pandas as pd
import os


dataset_path = './ds003004'
subject = 'sub-01'
events_file_path = os.path.join(dataset_path, subject, 'eeg', f'{subject}_task-ImaginedEmotion_events.tsv')

print(f"Attempting to load events file from: {events_file_path}")

try:

    events_df = pd.read_csv(events_file_path, sep='\t')

    print("\n✅ Successfully loaded the events file. Here are the first 15 rows:")
    print("--------------------------------------------------------------------")

    print(events_df.head(15).to_string())
    print("--------------------------------------------------------------------")

    print("\nLet's look at the unique event types (trial_type column):")
    print(events_df['trial_type'].unique())
    print("\n--------------------------------------------------------------------")

    print("\nNow, let's look at the full table for a single emotion, for example 'sadness':")
    sadness_events = events_df[events_df['trial_type'] == 'sadness']
    print(sadness_events.to_string())


except FileNotFoundError:
    print(f"\n❌ FILE NOT FOUND. Could not find the events.tsv file at the specified path.")
    print("Please make sure you have run the download command and the path is correct.")
except Exception as e:
    print(f"\n❌ AN ERROR OCCURRED: {e}")

Attempting to load events file from: ./ds003004/sub-01/eeg/sub-01_task-ImaginedEmotion_events.tsv

✅ Successfully loaded the events file. Here are the first 15 rows:
--------------------------------------------------------------------
         onset  duration  sample  trial_type  response_time  stim_file                        value  HED
0     6.343750       0.0     NaN         NaN            NaN        NaN          InitialInstructions  NaN
1     6.734375       0.0     NaN         NaN            NaN        NaN             prebase_instruct  NaN
2   117.503906       0.0     NaN         NaN            NaN        NaN                      prebase  NaN
3   237.503906       0.0     NaN         NaN            NaN        NaN                         exit  NaN
4   237.507812       0.0     NaN         NaN            NaN        NaN  FeelingItInstructionsButton  NaN
5   237.781250       0.0     NaN         NaN            NaN        NaN        InstructionsForEnding  NaN
6   304.789062       0.0     N

In [None]:
# =============================================================
# TEMPORAL GENERALIZATION SCRIPT for ds003004
# Goal: Train a model on Phase 1 and test it on Phase 2.
# =============================================================

# --- STEP 0: INSTALL MNE-PYTHON ---
!pip install mne -q

import mne
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# --- STEP 1: DEFINE FILE PATHS ---
dataset_path = './ds003004'
subject = 'sub-01'
eeg_file_path = os.path.join(dataset_path, subject, 'eeg', f'{subject}_task-ImaginedEmotion_eeg.set')
events_file_path = os.path.join(dataset_path, subject, 'eeg', f'{subject}_task-ImaginedEmotion_events.tsv')

print(f"Loading EEG data from: {eeg_file_path}")
print(f"Loading events from: {events_file_path}")

try:
    # --- STEP 2: LOAD EEG DATA AND EVENTS ---
    raw = mne.io.read_raw_eeglab(eeg_file_path, preload=True)
    events_df = pd.read_csv(events_file_path, sep='\t')

    raw.filter(l_freq=1.0, h_freq=None, n_jobs=-1)

    print("\n✅ Data and events loaded successfully.")

    # --- STEP 3: PARSE EVENTS TO DEFINE TRIALS AND PHASES ---
    print("--- Parsing events to define emotion trials and phases ---")

    emotion_list = [
        'awe', 'frustration', 'joy', 'anger', 'happy', 'sad',
        'love', 'fear', 'compassion', 'jealousy', 'content',
        'grief', 'relief', 'disgust', 'excitement'
    ]

    phase1_epochs_list = []
    phase2_epochs_list = []
    labels_list = []

    for i, emotion in enumerate(emotion_list):
        emotion_event = events_df[events_df['value'] == emotion]
        if emotion_event.empty: continue

        t_start = emotion_event['onset'].iloc[0]

        press1_event = events_df[(events_df['value'] == 'press1') & (events_df['onset'] > t_start)]
        if press1_event.empty: continue
        t_button1 = press1_event['onset'].iloc[0]

        end_event = events_df[(events_df['value'].isin(['relax', 'exit'])) & (events_df['onset'] > t_button1)]
        if end_event.empty: continue
        t_end = end_event['onset'].iloc[0]

        phase1_duration = t_button1 - t_start
        if phase1_duration <= 0: continue
        events_phase1 = np.array([[int(t_start * raw.info['sfreq']), 0, i+1]])
        epochs_p1 = mne.Epochs(raw, events_phase1, event_id=i+1, tmin=0, tmax=phase1_duration, preload=True, baseline=None)

        phase2_duration = t_end - t_button1
        if phase2_duration <= 0: continue
        events_phase2 = np.array([[int(t_button1 * raw.info['sfreq']), 0, i+1]])
        epochs_p2 = mne.Epochs(raw, events_phase2, event_id=i+1, tmin=0, tmax=phase2_duration, preload=True, baseline=None)

        if len(epochs_p1) > 0 and len(epochs_p2) > 0:
            phase1_epochs_list.append(epochs_p1.get_data())
            phase2_epochs_list.append(epochs_p2.get_data())

            if emotion in ['frustration', 'anger', 'sad', 'fear', 'jealousy', 'grief', 'disgust']:
                labels_list.append('Negative')
            else:
                labels_list.append('Positive')

    y_labels = np.array(labels_list)

    print(f"\n✅ Successfully created {len(y_labels)} epochs for Phase 1 and Phase 2.")

    # --- STEP 4: CALCULATE FEATURES (PSD) ---
    print("--- Calculating Power Spectral Density (PSD) features ---")

    def compute_psd_for_list(epoch_list, sfreq):
        psd_features = []
        for epoch_data in epoch_list:

            psd, freqs = mne.time_frequency.psd_array_welch(epoch_data, sfreq=sfreq, fmin=1, fmax=45, n_fft=256, average='mean')

            psd_features.append(psd.flatten())
        return np.array(psd_features)

    X_phase1 = compute_psd_for_list(phase1_epochs_list, raw.info['sfreq'])
    X_phase2 = compute_psd_for_list(phase2_epochs_list, raw.info['sfreq'])

    print(f"Feature matrix shape for Phase 1: {X_phase1.shape}")
    print(f"Feature matrix shape for Phase 2: {X_phase2.shape}")

    # --- STEP 5: TRAIN ON PHASE 1, TEST ON PHASE 2 ---
    print("\n--- STEP 5: Training on Phase 1, Testing on Phase 2 ---")

    if len(y_labels) < 5:
        raise ValueError("Not enough valid trials found to perform classification.")

    X_train = X_phase1
    X_test = X_phase2

    clf = make_pipeline(StandardScaler(), SVC(kernel='linear'))
    clf.fit(X_train, y_labels)

    accuracy = clf.score(X_test, y_labels)

    print("\n================================================")
    print("       TEMPORAL GENERALIZATION RESULTS")
    print("================================================")
    print(f"Model trained on 'Formation' Phase (Phase 1), tested on 'Persistent' Phase (Phase 2).")
    print(f"Accuracy: {accuracy:.2%}")
    print("This result shows how well the 'formation' signature predicts the 'persistent' state.")
    print("Chance level for Positive vs. Negative is ~50%.")

    cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    scores_phase2 = cross_val_score(clf, X_test, y_labels, cv=cv, scoring='accuracy')
    print(f"\nFor reference, internal classification accuracy of Phase 2 data: {np.mean(scores_phase2):.2%}")

except FileNotFoundError:
    print(f"\n❌ FILE NOT FOUND. Could not find EEG data at the specified path.")
except Exception as e:
    print(f"\n❌ AN ERROR OCCURRED: {e}")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.4 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/7.4 MB[0m [31m10.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/7.4 MB[0m [31m43.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.4/7.4 MB[0m [31m84.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m67.5 MB/s[0m eta [36m0:00:00[0m
[?25hLoading EEG data from: ./ds003004/sub-01/eeg/sub-01_task-ImaginedEmotion_eeg.set
Loading events from: ./ds003004/sub-01/eeg/sub-01_task-ImaginedEmotion_events.tsv
Reading /content/ds003004/sub-01/eeg/sub-01_task-ImaginedEmotion_eeg.fdt
Reading 0 ... 1154303  =      0.000 ...  4508.996 secs...
Filtering raw data in 1 contiguous segment
Setting up high-pass filter at 1 

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done 202 tasks      | elapsed:   13.0s
[Parallel(n_jobs=-1)]: Done 224 out of 224 | elapsed:   14.7s finished



✅ Data and events loaded successfully.
--- Parsing events to define emotion trials and phases ---
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 1 events and 16414 original time points ...
0 bad epochs dropped
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 1 events and 32669 original time points ...
0 bad epochs dropped
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 1 events and 20140 original time points ...
0 bad epochs dropped
Not setting metadata
1 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 1 events and 48603 original time points ...
0 bad epochs dropped
Not setting metadata
1 matching events found
No baseline correction applied
0 project

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Effective window size : 1.000 (s)
Effective window size : 1.000 (s)


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Effective window size : 1.000 (s)
Feature matrix shape for Phase 1: (14, 10080)
Feature matrix shape for Phase 2: (14, 10080)

--- STEP 5: Training on Phase 1, Testing on Phase 2 ---

       TEMPORAL GENERALIZATION RESULTS
Model trained on 'Formation' Phase (Phase 1), tested on 'Persistent' Phase (Phase 2).
Accuracy: 78.57%
This result shows how well the 'formation' signature predicts the 'persistent' state.
Chance level for Positive vs. Negative is ~50%.

For reference, internal classification accuracy of Phase 2 data: 50.00%


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished
