In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [36]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [37]:
csv_path = "/content/drive/MyDrive/SIENA_EEG/subject_info.csv"
df = pd.read_csv(csv_path)

print("Original columns:", df.columns.tolist())

Original columns: ['patient_id', ' age_years', ' gender', ' seizure', ' localization', ' lateralization', ' eeg_channel', ' number_seizures', ' rec_time_minutes']


In [38]:
df.columns = df.columns.str.strip()       # removes leading/trailing spaces
print("Cleaned columns:", df.columns.tolist())


Cleaned columns: ['patient_id', 'age_years', 'gender', 'seizure', 'localization', 'lateralization', 'eeg_channel', 'number_seizures', 'rec_time_minutes']


In [39]:
numerical_cols = ['age_years', 'eeg_channel', 'number_seizures', 'rec_time_minutes']
categorical_cols = ['gender', 'seizure', 'localization', 'lateralization']


In [40]:
scaler = StandardScaler()
num_scaled = scaler.fit_transform(df[numerical_cols])


In [42]:
encoder = OneHotEncoder(sparse_output=False)  # updated argument
cat_encoded = encoder.fit_transform(df[categorical_cols])

In [43]:
demographic_embeddings = np.concatenate([num_scaled, cat_encoded], axis=1)
print("Embedding shape:", demographic_embeddings.shape)

Embedding shape: (14, 14)


In [46]:
np.save('/content/drive/MyDrive/SIENA_EEG/demographic_embeddings.npy', demographic_embeddings)

In [47]:
import numpy as np

# Load the embeddings
demographic_embeddings = np.load('demographic_embeddings.npy')

# See the shape
print("Shape:", demographic_embeddings.shape)

# See the first few rows
print("First 5 embeddings:\n", demographic_embeddings[:5])

# Optional: see the full array (careful if it's huge)
# print(demographic_embeddings)


Shape: (14, 14)
First 5 embeddings:
 [[ 0.84008131  0.2773501   0.73886645 -1.03066422  0.          1.
   0.          1.          0.          0.          1.          0.
   0.          1.        ]
 [ 0.18262637  0.2773501  -0.61036794  0.7569071   0.          1.
   0.          1.          0.          0.          1.          0.
   1.          0.        ]
 [ 0.76703076  0.2773501  -0.61036794  0.59014512  0.          1.
   0.          1.          0.          0.          1.          0.
   0.          1.        ]
 [ 0.54787912  0.2773501  -0.16062314 -0.55963479  1.          0.
   0.          1.          0.          0.          1.          0.
   1.          0.        ]
 [-0.54787912  0.2773501   0.73886645  0.50237567  0.          1.
   0.          1.          0.          0.          1.          0.
   1.          0.        ]]
