## Quick script to check the missing channels per ROI for each subject 

In [None]:
import re
import pandas as pd
from pathlib import Path

# Setting the file path for the relevant files, should be changed if needed
INPUT_CSV  = Path("/Users/lucas.assen/Desktop/Master Thesis/bad_channel_summary.csv")     
COUNTS_WIDE_CSV = Path("/Users/lucas.assen/Desktop/Master Thesis/bad_channels_counts_wide.csv")
LONG_CSV        = Path("/Users/lucas.assen/Desktop/Master Thesis/bad_channels_long.csv")

# Defining the relevant regions
REGIONS = {"F1": ["AF7","AF3","F7","F5","F3","F1"],
           "F2": ["AF4","AF8","F4","F6","F2","F8"],
           "C" : ["Fz","FC1","FCZ","FC2","C1","Cz","C2","CP1","CPZ","CP2","P1","Pz","P2","POZ"],
           "O" : ["PO7","PO8","PO3","PO4","O1","OZ","O2"],
           "P1": ["TP7","CP5","CP3","P7","P5","P3"],
           "P2": ["CP4","CP6","CP8","P4","P6","P8"],
           "T1": ["FT7","FC5","FC3","T7","C5","C3"],
           "T2": ["FC4","FC6","FC8","C4","C6","T8"],}

# Due to one of the csv file having a semocolon as separator we need a extra helper function to change it.
def parse_bad_list(cell: str | float) -> list[str]:
    if pd.isna(cell):
        return []
    cell = str(cell).strip()
    return [] if not cell else re.split(r"\s*,\s*", cell)

def main() -> None:
    df = pd.read_csv(INPUT_CSV)
    df["bad_list"] = df["bad_ch_names"].apply(parse_bad_list)

    # First we create a long format file of the number of bad channels and which channels these are for each region per participant
    records = []
    for idx, row in df.iterrows():
        participant = row.get("participant_id", idx)  # fallback to row index
        bad_set = set(row["bad_list"])
        for region, ch_list in REGIONS.items():
            bad_in_region = sorted(bad_set.intersection(ch_list))
            records.append({"participant_id": participant,
                            "region": region,
                            "n_bad": len(bad_in_region),
                            "bad_channels": ", ".join(bad_in_region)})
    long_df = pd.DataFrame.from_records(records)
    long_df.to_csv(LONG_CSV, index=False)

    # A wide version is also created which makes it easier to see the number of channels per region per participant
    counts_wide = (long_df.pivot(index="participant_id", columns="region", values="n_bad")
               .fillna(0)
               .astype(int)
               .reset_index()
               .rename_axis(columns=None))
    counts_wide.to_csv(COUNTS_WIDE_CSV, index=False)
    
if __name__ == "__main__":
    main()

✓ Wrote long format to /Users/lucas.assen/Desktop/Master Thesis/bad_channels_long.csv
✓ Wrote counts-wide format to /Users/lucas.assen/Desktop/Master Thesis/bad_channels_counts_wide.csv

--- counts (wide) head() ---
              participant_id  C  F1  F2  O  P1  P2  T1  T2
0   10_ICA_cleaned_processed  0   0   0  0   0   0   0   0
1  10c_ICA_cleaned_processed  0   0   1  0   0   0   2   0
2   11_ICA_cleaned_processed  0   0   0  0   1   0   3   0
3  11c_ICA_cleaned_processed  0   0   0  0   0   0   0   0
4   12_ICA_cleaned_processed  0   0   1  1   0   0   0   0

--- long format head() ---
             participant_id region  n_bad bad_channels
0  10_ICA_cleaned_processed     F1      0             
1  10_ICA_cleaned_processed     F2      0             
2  10_ICA_cleaned_processed      C      0             
3  10_ICA_cleaned_processed      O      0             
4  10_ICA_cleaned_processed     P1      0             
