In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import umap
import umap.plot
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

In [45]:
# Data

trad_data = pd.read_csv("data/Braver_StroopData.csv")
trad_df = pd.DataFrame(trad_data)

embodied_data = pd.read_csv("data/eeg_n450.csv")
embodied_df = pd.DataFrame(embodied_data)

In [46]:
trad_df.columns

Index(['Unnamed: 0', 'trial', 'rule', 'pc', 'congruency', 'stimset', 'subj',
       'session', 'run', 'block',
       ...
       'PreResp.CP6', 'PreResp.C4', 'PreResp.FC2', 'PreResp.FC6', 'PreResp.F4',
       'PreResp.F8', 'PreResp.Fp2', 'PreResp.VEOG', 'PreResp.HEOGR',
       'PreResp.Cz'],
      dtype='object', length=109)

In [47]:
trad_eeg_columns = [col for col in trad_df.columns if col.startswith("N450.")]
trad_metadata_columns = ['trial', 'rule', 'subj', 'session', 'run', 'pc', 'stimset']  

trad_eeg_data = trad_df[trad_eeg_columns].values  # EEG data (trials × channels)
trad_rule = trad_df['rule'].values       # Task rule: "color" or "word"

In [48]:
embodied_cols = [col for col in embodied_df.columns if col.startswith("EEG")]
embodied_metadata_cols = ["trial_start", "rule"]

embodied_eeg_data = embodied_df[embodied_cols].values
embodied_rule = embodied_df["rule"].values

In [49]:
# Standardizing traditional Stroop task

scaler = StandardScaler()

trad_eeg_data_scaled = scaler.fit_transform(trad_eeg_data)

In [50]:
nan_counts = np.isnan(trad_eeg_data_scaled).sum()
print(f"Number of NaNs in EEG data: {nan_counts}")

Number of NaNs in EEG data: 20736


In [51]:
valid_rows = ~np.isnan(trad_eeg_data_scaled).any(axis=1)

clean_trad_eeg = trad_eeg_data_scaled[valid_rows]
clean_rule = trad_rule[valid_rows]

In [52]:
# Standardizing Embodied Stroop task
embodied_scaled = scaler.fit_transform(embodied_eeg_data)

In [53]:
nan_counts = np.isnan(embodied_scaled).sum()
print(f"Number of NaNs in EEG data: {nan_counts}")

Number of NaNs in EEG data: 0


## PCA

### Three components

In [54]:
pca = PCA(n_components=3)

In [55]:
trad_pca = pca.fit_transform(clean_trad_eeg)

In [None]:
colors = ['red' if r == 'cn' else 'blue' for r in clean_rule]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(trad_pca[:, 0], trad_pca[:, 1], trad_pca[:, 2], c=colors, alpha=0.6)
ax.set_title("PCA of EEG (Color vs Word Rule)")
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_zlabel("PC3")
plt.show()

In [58]:
embodied_pca = pca.fit_transform(embodied_scaled)