# Starling Task demographic info

By: Niloufar Shahdoust

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('svg')
matplotlib.rcParams['svg.fonttype'] = 'none'
matplotlib.rcParams['font.weight'] = 'bold'
from matplotlib.patches import Patch
import os
import seaborn as sns 
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import FixedLocator
from scipy.stats import permutation_test
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from scipy.stats import pearsonr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from scipy.optimize import curve_fit
from statsmodels.stats.multitest import multipletests
import os
import matplotlib.ticker as mticker
from pathlib import Path
from scipy.stats import mannwhitneyu


In [2]:
epileptic_root = Path(
    "/Users/niloufarshahdoust/Documents/PhD/0. NeuroSmith/Tasks/5_starling/0_all_data_epileptic"
)
healthy_root = Path(
    "/Users/niloufarshahdoust/Documents/PhD/0. NeuroSmith/Tasks/5_starling/0_all_data_healthy"
)

def load_demographics(root_dir, file_ext):
    """
    Load all demographic files from participant subfolders.

    Parameters
    ----------
    root_dir : Path
        Root directory containing participant folders
    file_ext : str
        File extension ('.csv' or '.xlsx')

    Returns
    -------
    list[pd.DataFrame]
        List of demographic DataFrames
    """
    dfs = []

    for participant_dir in root_dir.iterdir():
        if not participant_dir.is_dir():
            continue

        # Find files starting with 'demographic'
        for file in participant_dir.glob(f"demographic*{file_ext}"):
            if file_ext == ".csv":
                df = pd.read_csv(file)
            elif file_ext == ".xlsx":
                df = pd.read_excel(file)
            else:
                continue

            # Add useful metadata
            df["participant_id"] = participant_dir.name
            df["source_file"] = str(file)

            dfs.append(df)

    return dfs


# Load data
epileptic_dfs = load_demographics(epileptic_root, ".csv")
healthy_dfs   = load_demographics(healthy_root, ".xlsx")

# Optional: quick sanity check
print(f"Loaded {len(epileptic_dfs)} epileptic demographic files")
print(f"Loaded {len(healthy_dfs)} healthy demographic files")


Loaded 10 epileptic demographic files
Loaded 37 healthy demographic files


In [3]:
epileptic_dfs[1]

Unnamed: 0,Gender,Age,Ethnicity,Race,participant_id,source_file
0,Male,45-54,Not Hispanic or Latino,White,202506,/Users/niloufarshahdoust/Documents/PhD/0. Neur...


In [4]:
import pandas as pd
from contextlib import redirect_stdout
from pathlib import Path

# Concatenate per group
df_epileptic = pd.concat(epileptic_dfs, ignore_index=True)
df_healthy   = pd.concat(healthy_dfs, ignore_index=True)

# Normalize column names (safe)
df_epileptic.columns = df_epileptic.columns.str.strip()
df_healthy.columns   = df_healthy.columns.str.strip()

def count_unique_inputs(df, group_name):
    print(f"\n--- {group_name.upper()} ---")

    # AGE
    if "Age" in df.columns:
        print("\nAge (unique value counts):")
        print(df["Age"].dropna().value_counts())
    else:
        print("\nAge column not found.")
        print("Available columns:", df.columns.tolist())

    # GENDER
    if "Gender" in df.columns:
        print("\nGender (unique value counts):")
        print(df["Gender"].dropna().value_counts())
    else:
        print("\nGender column not found.")
        print("Available columns:", df.columns.tolist())


# ---- SAVE OUTPUT TO FILE ----
output_dir = Path("31_starling_demographic")
output_dir.mkdir(parents=True, exist_ok=True)

output_path = output_dir / "demographic_unique_counts.txt"

with open(output_path, "w") as f:
    with redirect_stdout(f):
        count_unique_inputs(df_epileptic, "epileptic")
        count_unique_inputs(df_healthy, "healthy")

print(f"Saved demographic summary to: {output_path.resolve()}")


Saved demographic summary to: /Users/niloufarshahdoust/Documents/PhD/0. NeuroSmith/Tasks/5_starling/figs/31_starling_demographic/demographic_unique_counts.txt
