Initialize data, load packages and needed modules

In [1]:
random_seeds = [42, 100, 0, 10, 12, 20, 50, 9, 30, 51]
run = 0

In [2]:
username = 'meganorm-yverduyn'
datasets = {
    'TDBrain': {''
        'base_dir': '/project/meganorm/Data/EEG_TDBrain/EEG/', #Final
        #'base_dir': '/home/meganorm-yverduyn/Dev/BIDS_TDBrain',  #To test 
        'task': 'task-restEC', 
        'ending': 'eeg.vhdr'
    },

    'MIPDB': {
        'base_dir': '/project/meganorm/Data/EEG_MIPDB/EEG_BIDS/', #Final
        #'base_dir': '/home/meganorm-yverduyn/Dev/MIPDB/EEG_BIDS',  #To test 
        'task': 'task-eyesclosed', 
        'ending': 'eeg.set'
},

    'CMI': {
        'base_dir': '/project/meganorm/Data/EEG_CMI/EEG_BIDS', #Final
        #'base_dir': '/home/meganorm-yverduyn/Dev/BIDS_CMI',  #To test 
        'task': 'task-eyesclosed', 
        'ending': 'eeg.set'
    }
    }

package_path = f'/home/{username}/Code/MEGaNorm/'

In [3]:
import os
os.chdir(package_path)
from utils.parallel import submit_jobs, check_jobs_status, collect_results
from datasets.CMI import load_covariates_CMI
from datasets.MIPDB import mne_bids_MIPDB
from utils.nm import hbr_data_split, estimate_centiles
from plots.plots import plot_nm_range_site2, plot_comparison, plot_neurooscillochart, plot_age_dist2, plot_growthcharts, plot_quantile_gauge, box_plot_auc
from utils.nm import model_quantile_evaluation, calculate_oscilochart, cal_stats_for_gauge, mace
from utils.IO import merge_datasets_with_regex, merge_fidp_demo, merge_datasets_with_glob
from utils.IO import separate_eyes_open_close_eeglab
from datasets.mne_bids_conversion import make_demo_file_bids
import pandas as pd
import json
from pcntoolkit.normative_parallel import execute_nm, rerun_nm, collect_nm
import warnings
import pickle  
import numpy as np
from pcntoolkit.util.utils import z_to_abnormal_p, anomaly_detection_auc
from scipy.stats import false_discovery_control
warnings.filterwarnings("ignore")

Create configuration file

In [4]:
def make_config(project, path=None):

    # preprocess configurations =================================================
    # downsample data
    config = dict()

    # You could also set layout to None to have high 
    # choices: all, lobe, None
    config["which_layout"] = "all"

    # which sensor type should be used
    # choices: meg, mag, grad, eeg, opm
    config["which_sensor"] = "eeg"
    # config['fs'] = 500

    # ICA configuration
    config['ica_n_component'] = 25
    config['ica_max_iter'] = 800
    config['ica_method'] = "infomax" 

    # lower and upper cutoff frequencies in a bandpass filter
    config['cutoffFreqLow'] = 1
    config['cutoffFreqHigh'] = 45

    config["resampling_rate"] = 1000
    config["digital_filter"] = True
    config["notch_filter"] = False

    config["apply_ica"] = True

    config["auto_ica_corr_thr"] = 0.8

    config["rereference_method"]= "average"
    
    # variance threshold across time
    config["mag_var_threshold"] = 4e-12
    config["grad_var_threshold"] = 4000e-13
    config["eeg_var_threshold"] = 40e-6  
    # flatness threshold across time
    config["mag_flat_threshold"] = 10e-15
    config["grad_flat_threshold"] = 10e-15
    config["eeg_flat_threshold"] = 10e-6 

    # segmentation ==============================================
    #start time of the raw data to use in seconds, this is to avoid possible eye blinks in close-eyed resting state. 
    config['segments_tmin'] = 5
    # end time of the raw data to use in seconds, this is to avoid possible eye blinks in close-eyed resting state.
    config['segments_tmax'] = -5
    # length of EEG segments in seconds
    config['segments_length'] = 10
    # amount of overlap between EEG sigals in seconds
    config['segments_overlap'] = 2

    # PSD ==============================================
    # Spectral estimation method
    config['psd_method'] = "welch"
    # amount of overlap between windows in Welch's method
    config['psd_n_overlap'] = 1
    config['psd_n_fft'] = 2
    # number of samples in psd
    config["psd_n_per_seg"] = 2

    # fooof analysis configurations ==============================================
    # Desired frequency range to run FOOOF
    config['fooof_freqRangeLow'] = 3
    config['fooof_freqRangeHigh'] = 40
    # which mode should be used for fitting; choices (knee, fixed)
    config["aperiodic_mode"] = "knee"
    # minimum acceptable peak width in fooof analysis
    config["fooof_peak_width_limits"] = [1.0, 12.0]
    #Absolute threshold for detecting peaks
    config['fooof_min_peak_height'] = 0
    #Relative threshold for detecting peaks
    config['fooof_peak_threshold'] = 2

    # feature extraction ==========================================================
    # Define frequency bands
    config['freq_bands'] = {
                            'Theta': (3, 8),
                            'Alpha': (8, 13),
                            'Beta': (13, 30),
                            'Gamma': (30, 40),
                            # 'Broadband': (3, 40)
                            }

    # Define individualized frequency range over main peaks in each freq band
    config['individualized_band_ranges'] = { 
                                            'Theta': (-2, 3),
                                            'Alpha': (-2, 3), # change to (-4,2)
                                            'Beta': (-8, 9),
                                            'Gamma': (-5, 5)
                                            }

    # least acceptable R squred of fitted models
    config['min_r_squared'] = 0.9 
 
    config['feature_categories'] = {
                                    "Offset":False,
                                    "Exponent":False,
                                    "Peak_Center":True,
                                    "Peak_Power":True,
                                    "Peak_Width":True,
                                    "Adjusted_Canonical_Relative_Power":True, 
                                    "Adjusted_Canonical_Absolute_Power":False,
                                    "Adjusted_Individualized_Relative_Power":False,
                                    "Adjusted_Individualized_Absolute_Power":False,
                                    "OriginalPSD_Canonical_Relative_Power":True, 
                                    "OriginalPSD_Canonical_Absolute_Power":False,
                                    "OriginalPSD_Individualized_Relative_Power":False,
                                    "OriginalPSD_Individualized_Absolute_Power":False,
                                    }
    
    config["fooof_res_save_path"] = None

    config["random_state"] = 97#change? 

    if path is not None:
        out_file = open(os.path.join(path, project + ".json"), "w") 
        json.dump(config, out_file, indent = 6) 
        out_file.close()

    return config

Define directories and job specifications needed for feature extraction and NM

In [5]:
project = "Thesis"

project_dir = f'/home/{username}/Results/{project}/'

mainParallel_path = os.path.join(package_path, 'src', 'mainParallel.py')

features_dir = os.path.join(project_dir, 'Features')
features_log_path = os.path.join(features_dir, 'log')
features_temp_path = os.path.join(features_dir,'temp')

nm_processing_dir = os.path.join(project_dir, 'NM', 'Run_' + str(run))

job_configs = {'log_path':features_log_path, 'module':'mne', 'time':'1:00:00', 'memory':'20GB', 
                'partition':'normal', 'core':1, 'node':1, 'batch_file_name':'batch_job'}

if not os.path.isdir(features_log_path):
    os.makedirs(features_log_path)

if not os.path.isdir(features_temp_path):
    os.makedirs(features_temp_path)
    
if not os.path.isdir(nm_processing_dir):
    os.makedirs(nm_processing_dir)
    
configs = make_config(project, project_dir)

subjects = merge_datasets_with_glob(datasets)


Parallel Feature Extraction

In [None]:
### Parallel feature extraction  

# # Running Jobs
start_time = submit_jobs(mainParallel_path, features_dir, subjects, 
                features_temp_path, job_configs=job_configs, config_file=os.path.join(project_dir, project+'.json'))
# Checking jobs
failed_jobs = check_jobs_status(username, start_time)

falied_subjects = {failed_job:subjects[failed_job] for failed_job in failed_jobs}

while len(failed_jobs)>0:
    # Re-running Jobs
    start_time = submit_jobs(mainParallel_path, features_dir, falied_subjects, 
                features_temp_path, job_configs=job_configs, config_file=os.path.join(project_dir, project+'.json'))
    # Checking jobs
    failed_jobs = check_jobs_status(username, start_time)

Collect the results, merge features and covariates and create train-tests data for NM

In [6]:
collect_results(features_dir, subjects, features_temp_path, file_name='all_features')

In [23]:
#Create participants.tsv 
file_dir1 = "/project/meganorm/Data/EEG_CMI/EEG_BIDS/covariates.tsv"
save_dir1 = "/project/meganorm/Data/EEG_CMI/EEG_BIDS/participants_bids.tsv"
make_demo_file_bids(file_dir1, 
                        save_dir1, 
                        0, 
                        1, 
                        {"col_name": "sex", "col_id": 2, "mapping":{0: "Male", 1: "Female"}, "single_value":None}, 
                        {"col_name": "site", "col_id": 3, "mapping":{0: "0", 1: "1", 2: "2", 3: "3", 4: "4"}, "single_value":None},
                        {"col_name": "eyes", "col_id": None, "mapping": None, "single_value":"eyes_closed"},
                        {"col_name": "diagnosis", "col_id": 4, "mapping": {
                            "ADHD-Combined Type": "adhd combined type",
                            "Generalized Anxiety Disorder": "generalized anxiety disorder",
                            "ADHD-Inattentive Type": "adhd inattentive type",
                            "Specific Learning Disorder with Impairment in Reading": "specific learning disorder with impairment in reading",
                            "Disruptive Mood Dysregulation Disorder": "disruptive mood dysregulation disorder",
                            "Oppositional Defiant Disorder": "oppositional defiant disorder",
                            "Major Depressive Disorder": "major depressive disorder",
                            "Tourettes Disorder": "tourettes disorder",
                            "Other Specified Anxiety Disorder": "other specified anxiety disorder",
                            "Other Specified Attention-Deficit/Hyperactivity Disorder": "other specified attention deficit hyperactivity disorder",
                            "No Diagnosis Given": "control",
                            "Autism Spectrum Disorder": "autism spectrum disorder",
                            "Language Disorder": "language disorder",
                            "Specific Learning Disorder with Impairment in Mathematics": "specific learning disorder with impairment in mathematics",
                            "No Diagnosis Given: Incomplete Eval": "no diagnosis given incomplete eval",
                            "Separation Anxiety": "separation anxiety",
                            "Social (Pragmatic) Communication Disorder": "social pragmatic communication disorder",
                            "Provisional Tic Disorder": "provisional tic disorder",
                            "Social Anxiety (Social Phobia)": "social anxiety social phobia",
                            "Specific Phobia": "specific phobia",
                            "Borderline Intellectual Functioning": "borderline intellectual functioning",
                            "ADHD-Hyperactive/Impulsive Type": "adhd hyperactive impulsive type",
                            "Intellectual Disability-Moderate": "intellectual disability moderate",
                            "Intellectual Disability-Mild": "intellectual disability mild",
                            "Adjustment Disorders": "adjustment disorders",
                            "Bipolar I Disorder": "bipolar i disorder",
                            "Obsessive-Compulsive Disorder": "obsessive compulsive disorder",
                            "Conduct Disorder-Childhood-onset type": "conduct disorder childhood onset type",
                            "Selective Mutism": "selective mutism",
                            "Other Specified Depressive Disorder": "other specified depressive disorder",
                            "Unspecified Attention-Deficit/Hyperactivity Disorder": "unspecified attention deficit hyperactivity disorder",
                            "Other Specified Disruptive, Impulse-Control, and Conduct Disorder": "other specified disruptive impulse control and conduct disorder",
                            "Persistent Depressive Disorder (Dysthymia)": "persistent depressive disorder dysthymia",
                            "Other Specified Trauma- and Stressor-Related Disorder": "other specified trauma and stressor related disorder",
                            "Other Specified Tic Disorder": "other specified tic disorder",
                            "Posttraumatic Stress Disorder": "posttraumatic stress disorder",
                            "Excoriation (Skin-Picking) Disorder": "excoriation skin picking disorder",
                            "Substance/Medication-Induced Bipolar and Related Disorder": "substance medication induced bipolar and related disorder",
                            "Specific Learning Disorder with Impairment in Written Expression": "specific learning disorder with impairment in written expression",
                            "Enuresis": "enuresis",
                            "Major Neurocognitive Disorder Due to Epilepsy": "major neurocognitive disorder due to epilepsy",
                            "Speech Sound Disorder": "speech sound disorder",
                            "Encopresis": "encopresis",
                            "Bipolar II Disorder": "bipolar ii disorder",
                            "Intermittent Explosive Disorder": "intermittent explosive disorder",
                            "Persistent (Chronic) Motor or Vocal Tic Disorder": "persistent chronic motor or vocal tic disorder",
                            "Other Specified Neurodevelopmental Disorder": "other specified neurodevelopmental disorder",
                            "Unspecified Anxiety Disorder": "unspecified anxiety disorder",
                            "Other Specified Feeding or Eating Disorder": "other specified feeding or eating disorder",
                            "Cannabis Use Disorder": "cannabis use disorder",
                            "Bulimia Nervosa": "bulimia nervosa",
                            "Avoidant/Restrictive Food Intake Disorder": "avoidant restrictive food intake disorder",
                            " ": "unspecified",
                            "Reactive Attachment Disorder": "reactive attachment disorder",
                            "Unspecified Neurodevelopmental Disorder": "unspecified neurodevelopmental disorder",
                            "Agoraphobia": "agoraphobia",
                            "Depressive Disorder Due to Another Medical Condition": "depressive disorder due to another medical condition",
                            "Delirium due to another medical condition": "delirium due to another medical condition",
                            "Specific Learning Disorder with Impairment in Reading ": "specific learning disorder with impairment in reading",
                            "Cyclothymic Disorder": "cyclothymic disorder",
                            "Schizophrenia": "schizophrenia",
                            "Delirium due to multiple etiologies": "delirium due to multiple etiologies",
                            "Gender Dysphoria in Adolescents and Adults": "gender dysphoria in adolescents and adults",
                            "Other Specified Obsessive-Compulsive and Related Disorder": "other specified obsessive compulsive and related disorder",
                            "Developmental Coordination Disorder": "developmental coordination disorder",
                            "Acute Stress Disorder": "acute stress disorder"}, "single_value":None})

file_dir2 = "/project/meganorm/Data/EEG_TDBrain/EEG/TDBRAIN_participants_V2.tsv"
save_dir2 = "/project/meganorm/Data/EEG_TDBrain/EEG/participants_bids.tsv"
make_demo_file_bids(file_dir2, 
                        save_dir2, 
                        0, 
                        10, 
                        {"col_name": "sex", "col_id": 11, "mapping": {1.0: "Male", 0.0: "Female"}, "single_value":None},
                        {"col_name": "eyes", "col_id": None, "mapping": None, "single_value":"eyes_closed"},
                        {"col_name": "diagnosis", "col_id": 3, "mapping": {
                            "UNKNOWN": "unknown", 
                            "REPLICATION": "replication", "BURNOUT": "burnout",  "SMC": "smc", 
                            "HEALTHY": "control", "Dyslexia": "dyslexia", "CHRONIC PAIN": "chronic pain", 
                            "MDD": "mdd", "nan": "nan", "ADHD": "adhd", 
                            "ADHD/ASPERGER": "adhd/asperger", "PDD NOS/DYSLEXIA": "pdd nos/dyslexia", 
                            "PDD NOS": "pdd nos", "WHIPLASH": "whiplash", "ANXIETY": "anxiety",
                            "ADHD/DYSLEXIA": "adhd/dyslexia", "ASD": "asd", "TINNITUS": "tinnitus",
                            "OCD": "ocd", "Tinnitus": "tinnitus", "PDD NOS ": "pdd nos", "PANIC": "panic",
                            "MDD/ANXIETY": "mdd/anxiety", "MIGRAINE": "migraine", "PDD NOS/ANXIETY": "pdd nos/anxiety",
                            "PARKINSON": "parkinson",  "BIPOLAR": "bipolar",  "MDD/bipolar": "mdd/bipolar",
                            "DYSPRAXIA": "dyspraxia", "TINNITUS/MDD": "tinnitus/mdd", "ADHD/ASD/ANXIETY": "adhd/asd/anxiety",
                            "MDD/ADHD": "mdd/adhd",  "ADHD/PDD NOS": "adhd/pdd nos", "MDD/BIPOLAR": "mdd/bipolar",
                            "ASPERGER": "asperger", "ADHD/EPILEPSY": "adhd/epilepsy", "MDD/PAIN": "mdd/pain",
                            "PDD NOS/GTS": "pdd nos/gts",  "PDD NOS/ADHD": "pdd nos/adhd", "PDD NOS/ASD": "pdd nos/asd",
                            "TBI": "tbi", "ADHD/ANXIETY": "adhd/anxiety",  "ADHD/DYSLEXIA/DYSCALCULIA": "adhd/dyslexia/dyscalculia",
                            "ADHD/MDD": "adhd/mdd", "MDD/PANIC": "mdd/panic", "DEPERSONALIZATION": "depersonalization",
                            "MDD/TRAUMA": "mdd/trauma", "PTSD/ADHD": "ptsd/adhd",  "OCD/DPS": "ocd/dps","MDD/OCD": "mdd/ocd",
                            "MDD/TUMOR": "mdd/tumor", "ADHD/GTS": "adhd/gts", "OCD/MDD": "ocd/mdd", "CONVERSION DX": "conversion dx",
                            "ASD/ASPERGER": "asd/asperger", "MDD/ADHD/LYME": "mdd/adhd/lyme", "ADHD/OCD": "adhd/ocd",
                            "MSA-C": "msa-c", "OCD/ASD": "ocd/asd", "STROKE/PAIN": "stroke/pain",
                            "STROKE ": "stroke", "MDD/OCD/ADHD": "mdd/ocd/adhd",  "EPILEPSY/OCD": "epilepsy/ocd",
                            "ADHD ": "adhd", "INSOMNIA": "insomnia", "MDD/ADHD/ANOREXIA": "mdd/adhd/anorexia",
                            "MDD/ANXIETY/TINNITUS": "mdd/anxiety/tinnitus"}, "single_value":None})


file_dir3 = "/project/meganorm/Data/EEG_MIPDB/info/MIPDB_PublicFile.csv"
save_dir3 = "/project/meganorm/Data/EEG_MIPDB/EEG_BIDS/participants_bids.tsv"
make_demo_file_bids(file_dir3, 
                    save_dir3, 
                    0, 
                    1, 
                    {"col_name": "sex", "col_id": 2, "mapping": {1.0: "Male", 2.0: "Female"}, "single_value":None},
                    {"col_name": "eyes", "col_id": None, "mapping": None, "single_value":"eyes_closed"},
                    {"col_name": "diagnosis", "col_id": 37, "mapping": {
                        0: "control",
                        1: "unknown_1",
                        2: "unknown_2"}, "single_value":None})


In [24]:
### Data preparation for Normative Modeling
base_dirs = [values["base_dir"] for values in datasets.values()]
dataset_names = list(datasets.keys())
merged_data, data_patient = merge_fidp_demo(base_dirs, features_dir, dataset_names, include_patients=False)
biomarker_num = hbr_data_split(merged_data, nm_processing_dir, drop_nans=True, batch_effects=['sex', 'site'], random_seed=random_seeds[run], train_split=0.50)
print("merged_data:", merged_data['site'])


demo:                   age     sex         eyes     diagnosis
participant_id                                          
sub-19681349    51.59  Female  eyes_closed   replication
sub-19681385    49.96    Male  eyes_closed   replication
sub-19684666    47.05    Male  eyes_closed   replication
sub-19686324    62.51    Male  eyes_closed   replication
sub-19687321    53.98    Male  eyes_closed   replication
sub-19687321    54.34    Male  eyes_closed   replication
sub-19687396    43.66  Female  eyes_closed   replication
sub-19690494    55.72  Female  eyes_closed   replication
sub-19690494    56.10  Female  eyes_closed   replication
sub-19690969    44.13    Male  eyes_closed   replication
sub-19690969    44.50    Male  eyes_closed   replication
sub-19691048    41.72    Male  eyes_closed   replication
sub-19691048    41.97    Male  eyes_closed   replication
sub-19691782    58.40  Female  eyes_closed   replication
sub-19693739    23.19  Female  eyes_closed   replication
sub-19694366    41.22  Fe

                       age  sex  \
participant_id                    
sub-19681349      0.515900    0   
sub-19681385      0.499600    1   
sub-19684666      0.470500    1   
sub-19686324      0.625100    1   
sub-19687321      0.539800    1   
sub-19687321      0.543400    1   
sub-19687396      0.436600    0   
sub-19690494      0.557200    0   
sub-19690494      0.561000    0   
sub-19690969      0.441300    1   
sub-19690969      0.445000    1   
sub-19691048      0.417200    1   
sub-19691048      0.419700    1   
sub-19691782      0.584000    0   
sub-19693739      0.231900    0   
sub-19694366      0.412200    0   
sub-19694625      0.192400    1   
sub-19694625      0.196300    1   
sub-19695359      0.598100    1   
sub-19696546      0.508300    0   
sub-19696870      0.557300    0   
sub-19696870      0.561200    0   
sub-19696913      0.398100    0   
sub-19697464      0.487700    0   
sub-19697464      0.492900    0   
sub-19698310      0.440100    0   
sub-19698421      0.

In [None]:
import pandas as pd
file =  "/project/meganorm/Data/EEG_MIPDB/EEG_BIDS/participants_bids.tsv"
pd.set_option('display.max_rows', None)
df = pd.read_csv(file, sep = '\t')
#print(df)
print(merged_data)

Specify the confgurations and job specifications for NM

In [9]:
### Setting up NM configs

python_path = '/project/meganorm/Software/Miniconda3/envs/mne/bin/python' 

hbr_configs = {
                'homo_Gaussian_linear':{'model_type':'linear', 'likelihood':'Normal', 'linear_sigma':'False',
                                        'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_Gaussian_bspline':{'model_type':'bspline', 'likelihood':'Normal', 'linear_sigma':'False',
                                        'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_SHASH_linear':{'model_type':'linear', 'likelihood':'SHASHb', 'linear_sigma':'False',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'False',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'hetero_Gaussian_linear':{'model_type':'linear', 'likelihood':'Normal', 'linear_sigma':'True',
                                        'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'},
                'hetero_Gaussian_bspline':{'model_type':'bspline', 'likelihood':'Normal', 'linear_sigma':'True',
                                        'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'},
                'hetero_SHASH_linear':{'model_type':'linear', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
                'hetero_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                        'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
            }

inscaler='None' 
outscaler='None' 
batch_size = 1
outputsuffix = '_estimate'

respfile = os.path.join(nm_processing_dir, 'y_train.pkl')
covfile = os.path.join(nm_processing_dir, 'x_train.pkl')

testrespfile_path = os.path.join(nm_processing_dir, 'y_test.pkl')
testcovfile_path = os.path.join(nm_processing_dir, 'x_test.pkl')

trbefile = os.path.join(nm_processing_dir, 'b_train.pkl')
tsbefile = os.path.join(nm_processing_dir, 'b_test.pkl')

memory = '2gb'
duration = '5:00:00'
cluster_spec = 'slurm'

Run the normative modeling

In [None]:
### Running NM

#for method in hbr_configs.keys():
method = 'hetero_SHASH_bspline'
processing_dir = os.path.join(nm_processing_dir, method) + '/'
nm_log_path = os.path.join(processing_dir, 'log') + '/'

if not os.path.isdir(processing_dir):
    os.makedirs(processing_dir)
if not os.path.isdir(nm_log_path):
    os.makedirs(nm_log_path)

execute_nm(processing_dir, python_path,
            'NM', covfile, respfile, batch_size, memory, duration, alg='hbr', 
            log_path=nm_log_path, binary=True, testcovfile_path=testcovfile_path, 
            testrespfile_path=testrespfile_path,trbefile=trbefile, tsbefile=tsbefile, 
            model_type=hbr_configs[method]['model_type'], likelihood=hbr_configs[method]['likelihood'],  
            linear_sigma=hbr_configs[method]['linear_sigma'], random_slope_mu=hbr_configs[method]['random_slope_mu'],
            linear_epsilon=hbr_configs[method]['linear_epsilon'], linear_delta=hbr_configs[method]['linear_delta'], 
            savemodel='True', inscaler=inscaler, outscaler=outscaler, outputsuffix=outputsuffix, 
            interactive='auto', cluster_spec=cluster_spec, nuts_sampler="nutpie", n_cores_per_batch="2")

Estimate the centiles and plot them 

In [None]:
## Estimate centiles 
method = 'hetero_SHASH_bspline'
processing_path = os.path.join(nm_processing_dir, method)

q = estimate_centiles(processing_path, biomarker_num, quantiles=[0.05, 0.25, 0.5, 0.75, 0.95],
                          batch_map={0:{'Male':0, 'Female':1}, 1:{'TDBrain':0, 'MIPDB':1, 'CMI1':2, 'CMI2':3, 'CMI3':4}}, 
                          age_range=[5, 25])

In [None]:
## Plot ranges 
method = 'hetero_SHASH_bspline'
processing_path = os.path.join(nm_processing_dir, method)

plot_nm_range_site2(processing_path, nm_processing_dir, batch_marker={"site":['TDBrain', 'CMI1', "CM2", "CMI3"]})

In [None]:
#Plot created for OHBM: original and adjusted relative power of theta, alpha, beta and gamme requency bands
from PIL import Image
from IPython.display import display
img = Image.open('/home/meganorm-yverduyn/Results/CMI/NM/Run_0/hetero_SHASH_bspline/Figures_experiment0/FINAL_OHBM.jpg')
display(img)

Compute metrices

In [None]:
#MACE
from utils.nm import evaluate_mace
model_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/hetero_SHASH_bspline/Models"
X_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/x_test.pkl"
y_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/y_test.pkl"
be_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/b_test.pkl"

# Initialize a list to store results
mace_results = []

# Loop through model IDs 1 to 20
for model_id in range(0, 20):
    # Call the evaluate_mace function
    mace_result = evaluate_mace(
        model_path=model_path,
        X_path=X_path,
        y_path=y_path,
        be_path=be_path,
        model_id=model_id,
        quantiles=[0.05, 0.25, 0.5, 0.75, 0.95],
        plot=False,
        outputsuffix='estimate'
    )
    mace_results.append((model_id, mace_result))

    # Print progress
    print(f"Model {model_id}: MACE = {mace_result}")

In [None]:
#SHAPIRO
import pickle
file_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/hetero_SHASH_bspline/Z_estimate.pkl"
with open(file_path, 'rb') as file:
    z_scores = pickle.load(file)

print(z_scores)
covariates_path = "/home/meganorm-yverduyn/Results/CMI/NM/Run_0/x_test.pkl"
with open(covariates_path, 'rb') as file:
    covariates = pickle.load(file)

from utils.nm import shapiro_stat
shapiro_stat(z_scores, covariates, n_bins=3)

In [None]:
# plot age distribution for different sites and train/test/validation partitions
base_dir = "/home/{username}/Results/{project}/NM"
plot_age_dist2(base_dir)

TEST ON CLINICAL DATA

In [None]:
"""
random_seeds = [0]
method = 'hetero_SHASH_bspline'
processing_dir = os.path.join(nm_processing_dir, method) + '/'
nm_log_path = os.path.join(processing_dir, 'log') + '/'

for i in range(len(random_seeds)):

    nm_processing_dir_temp = nm_processing_dir.replace("Run_0", f"Run_{i}")
    processing_dir_temp = processing_dir.replace("Run_0", f"Run_{i}")

    prefix = "clinicalpredict_"
    biomarker_num = prepare_test_data(data_patient.drop('diagnosis', axis=1),
                                nm_processing_dir_temp, 
                                drop_nans=True, 
                                batch_effects=['sex', 'site'], 
                                #train_split=0.0,
                                prefix=prefix)

    testrespfile_path = os.path.join(nm_processing_dir_temp, prefix + 'y_test.pkl')
    testcovfile_path = os.path.join(nm_processing_dir_temp, prefix + 'x_test.pkl')
    tsbefile = os.path.join(nm_processing_dir_temp, prefix + 'b_test.pkl')

    execute_nm(processing_dir_temp, python_path,
            'NM', testcovfile_path, testrespfile_path, batch_size, memory, duration, alg='hbr', 
            log_path=nm_log_path, binary=True, tsbefile=tsbefile, func="predict", 
            model_type=hbr_configs[method]['model_type'], likelihood=hbr_configs[method]['likelihood'],  
            linear_sigma=hbr_configs[method]['linear_sigma'], random_slope_mu=hbr_configs[method]['random_slope_mu'],
            linear_epsilon=hbr_configs[method]['linear_epsilon'], linear_delta=hbr_configs[method]['linear_delta'], 
            savemodel='True', inscaler=inscaler, outscaler=outscaler, outputsuffix="clinicalpredict", inputsuffix=outputsuffix,
            interactive='auto', cluster_spec=cluster_spec, nuts_sampler="nutpie", n_cores_per_batch="2")

"""

ABNORMALITY PROBABILITY INDEX

In [None]:
df = pd.DataFrame(columns=merged_data.columns[3:])
df_auc = pd.DataFrame(columns=merged_data.columns[3:])


for i in range(len(random_seeds)):

    nm_processing_dir_temp = nm_processing_dir.replace("Run_0", f"Run_{i}")
    processing_dir_temp = processing_dir.replace("Run_0", f"Run_{i}")

    with open(os.path.join(processing_dir_temp, "Z_clinicalpredict.pkl"), "rb") as file:
        z_patient = pickle.load(file)

    with open(os.path.join(processing_dir_temp,"Z_estimate.pkl"), "rb") as file:
        z_healthy = pickle.load(file)

    with open(os.path.join(nm_processing_dir_temp, "b_test.pkl"), "rb") as file:
        b_healthy = pickle.load(file)


    z_healthy = z_healthy.iloc[np.where(b_healthy["site"]==3)[0], :]

    data_patient = data_patient.dropna(axis=0)
    z_patient = z_patient.iloc[np.where(data_patient["diagnosis"].isin(["adhd combined type", "adhd inattentive type","adhd hyperactive impulsive type" ]))[0], :]


    p_patient = z_to_abnormal_p(z_patient)
    p_healthy = z_to_abnormal_p(z_healthy)

    p = np.concatenate([p_patient, p_healthy])
    print(p.shape)

    labels = np.concatenate([np.ones(p_patient.shape[0]), np.zeros(p_healthy.shape[0])])
    print(labels.shape)

    auc, p_val = anomaly_detection_auc(p, labels, n_permutation=1000)
    
    p_val = false_discovery_control(p_val)

    df.loc[i] = p_val
    df_auc.loc[i] = auc

AUC BOXPLOT

In [None]:
df_auc_plot = df_auc[["Adjusted_Canonical_Relative_PowerTheta_all", 
                      "Adjusted_Canonical_Relative_PowerAlpha_all",
                      "Adjusted_Canonical_Relative_PowerBeta_all",
                      "Adjusted_Canonical_Relative_PowerGamma_all"]]
df_auc_plot.columns = ["Theta", "Alpha", "Beta", "Gamma"]

box_plot_auc(df_auc_plot, save_path="")

In [None]:
data_patient = data_patient.iloc[np.where(data_patient["diagnosis"].isin(["adhd combined type", "adhd inattentive type","adhd hyperactive impulsive type" ]))[0], :]

print(data_patient.shape)

z_patient.index = data_patient.index
adhd_patient_feat = data_patient.iloc[:,4:]
print(adhd_patient_feat.shape)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(42)
a = list(z_patient.iloc[:, np.where(adhd_patient_feat.columns=="Adjusted_Canonical_Relative_PowerTheta_all")[0][0]])
b = list(z_patient.iloc[:, np.where(adhd_patient_feat.columns=="Adjusted_Canonical_Relative_PowerBeta_all")[0][0]])

plt.figure(figsize=(8, 8))

plt.ylim((-4, 4))
plt.xlim((-4, 4))

# Define the fixed order of labels and corresponding colors
order = [
    ('High beta - Low theta', 'red'),
    ('High theta - Low beta', 'purple'),
    ('High beta - Normal theta', 'blue'),
    ('Normal theta - Low beta', 'orange'),
    ('Normal beta - High theta', 'green'),
    ('Normal beta - Low theta', 'teal'),
    ('Low beta - Low theta', 'pink'),
    ('High beta - High theta', 'mediumvioletred'),
    ('Normal range', 'black')
]

# Initialize lists for colors and labels
colors = []
labels = []

# Assign colors and labels based on conditions
for theta, beta in zip(a, b):
    if beta > 0.68 and theta < -0.68:
        colors.append('red')
        labels.append('High beta - Low theta')
    elif theta > 0.68 and beta < -0.68:
        colors.append('purple')
        labels.append('High theta - Low beta')
    elif beta > 0.68 and -0.68 < theta < 0.68:
        colors.append("blue")
        labels.append('High beta - Normal theta')
    elif -.68 < theta < 0.68 and beta < -0.68:
        colors.append("orange")
        labels.append('Normal theta - Low beta')
    elif -0.68 < beta < 0.68 and theta > 0.68:
        colors.append("olive")
        labels.append('Normal beta - High theta')
    elif -0.68 < beta < 0.68 and theta < -0.68:
        colors.append("teal")
        labels.append('Normal beta - Low theta')

    elif  beta < -0.68 and theta < -0.68:
        colors.append("pink")
        labels.append('Low beta - Low theta')
    elif  beta > 0.68 and theta > 0.68:
        colors.append("mediumvioletred")
        labels.append('High beta - High theta')
    else:
        colors.append('black')
        labels.append('Normal range')

# Create the legend handles in the correct order
handles = []
for label, color in order:
    handles.append(plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=color, markersize=10, label=label))

# Plot the scatter plot
plt.scatter(a, b, color=colors)

# Add the gray region and lines
plt.fill_betweenx(y=[-0.68, 0.68], x1=-0.68, x2=0.68, color='gray', alpha=0.5, label="|z| < 0.68")
plt.hlines(y=[-0.68, 0.68], xmin=-0.68, xmax=0.68, colors='black', linestyles='--', linewidth=1.5)
plt.vlines(x=[-0.68, 0.68], ymin=-0.68, ymax=0.68, colors='black', linestyles='--', linewidth=1.5)

# Set axis ticks
ticks = [-3, -0.68, 0, 0.68, 3]
plt.xticks(ticks)
plt.yticks(ticks)

# Labeling
plt.xlabel('Theta z-scores', fontsize=16)
plt.ylabel('Beta z-scores', fontsize=16)

# Style the plot
plt.grid(alpha=0.5)
plt.gca().spines["right"].set_visible(False)
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["left"].set_visible(False)
plt.gca().spines["bottom"].set_visible(False)

# Add the legend with the correct order
plt.legend(handles=handles, fontsize=13)

# Finalize and save the plot
plt.tight_layout()
plt.savefig("normal_var.png", dpi=400)