In [None]:
import os
import pandas as pd

# Paths and mappings
norm_dir = "./norm_data"
schizo_dir = "./schizo_data"
regions = [
    "F7", "F3", "F4", "F8", "T3", "C3", "Cz", "C4", 
    "T4", "T5", "P3", "Pz", "P4", "T6", "O1", "O2"
]

In [None]:
# Process a signle file
def process_file(file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()
    
    # Enumerate over the regions 
    region_data = {}
    for i, region in enumerate(regions):
        start = i * 7680
        end = start + 7680
        region_data[region] = [float(value.strip()) for value in lines[start:end]]
    return region_data

# Now process all files in the directory with this function
def process_directory(directory, schizo_label):
    data = []
    for file_name in os.listdir(directory):
        if file_name.endswith(".eea"):
            file_path = os.path.join(directory, file_name)
            region_data = process_file(file_path)
            region_data["id"] = file_name.replace(".eea", "")
            region_data["schizo"] = schizo_label # Indicataes whether the data is from someone schizo
            data.append(region_data)
    return data


In [None]:

norm_data = process_directory(norm_dir, schizo_label=0)
schizo_data = process_directory(schizo_dir, schizo_label=1)
combined_data = norm_data + schizo_data
df = pd.DataFrame(combined_data)


df = df[["id", "schizo"] + regions]
print(df.head())
df.to_csv("eeg_data.csv", index=False)


       id  schizo                                                 F7  \
0  s178w1       0  [-350.69, -250.14, -131.2, -50.27, 169.21, 329...   
1  S154W1       0  [148.97, 109.99, 48.73, 29.24, 69.61, 109.99, ...   
2  s157w1       0  [-131.8, -30.56, -131.8, -210.11, -431.69, -47...   
3   S72W1       0  [48.24, -11.58, 88.76, 148.58, 109.99, 48.24, ...   
4   s94w1       0  [-310.93, -330.86, -170.08, -90.36, -70.43, -7...   

                                                  F3  \
0  [-90.74, 0.0, 159.4, 239.11, 438.97, 539.52, 6...   
1  [488.69, 349.46, 119.74, 89.11, 148.97, 189.35...   
2  [-450.79, -370.56, -170.0, -150.9, -311.35, -4...   
3  [179.46, 48.24, 88.76, 208.4, 69.47, -61.75, -...   
4  [-591.3, -651.1, -390.66, -190.01, -10.63, -30...   

                                                  F4  \
0  [259.95, 318.81, 539.52, 679.31, 898.79, 919.6...   
1  [419.08, 318.83, 79.36, -20.88, -20.88, 38.98,...   
2  [-311.35, -340.0, -261.69, -160.45, -70.67, -1...   
3  [21

In [None]:
def process_file(file_path, schizo_label):
    with open(file_path, "r") as file:
        lines = file.readlines()
    rows = []
    file_id = os.path.basename(file_path).replace(".eea", "")
    for i, region in enumerate(regions):
        start = i * 7680
        end = start + 7680
        eeg_data = [float(value.strip()) for value in lines[start:end]]
        # When adding data here, we add a new row per region
        rows.append({
            "id": file_id,
            "schizo": schizo_label,
            "region": region,
            "eeg_data": eeg_data
        })
    return rows

def process_directory(directory, schizo_label):
    data = []
    for file_name in os.listdir(directory):
        if file_name.endswith(".eea"):
            file_path = os.path.join(directory, file_name)
            data.extend(process_file(file_path, schizo_label))
    return data


norm_data = process_directory(norm_dir, schizo_label=0)
schizo_data = process_directory(schizo_dir, schizo_label=1)
combined_data = norm_data + schizo_data
df = pd.DataFrame(combined_data)


print(df.head())
df.to_csv("eeg_data_expanded.csv", index=False)


       id  schizo region                                           eeg_data
0  s178w1       0     F7  [-350.69, -250.14, -131.2, -50.27, 169.21, 329...
1  s178w1       0     F3  [-90.74, 0.0, 159.4, 239.11, 438.97, 539.52, 6...
2  s178w1       0     F4  [259.95, 318.81, 539.52, 679.31, 898.79, 919.6...
3  s178w1       0     F8  [39.24, 39.24, 199.87, 359.27, 698.93, 759.01,...
4  s178w1       0     T3  [-350.69, -210.9, -71.12, -30.65, 79.7, 179.02...
