In [None]:
username = 'meganorm-smkia'
datasets = {'BTNRH': '/project/meganorm/Data/BTNRH/BTNRH/BIDS_data/',
            'CAMCAN': '/project/meganorm/Data/BTNRH/CAMCAN/BIDS_data/'}
package_path = f'/home/{username}/Code/MEGaNorm/'

In [None]:
import os
os.chdir(package_path)
from utils.parallel import submit_jobs, check_jobs_status, collect_results
from datasets.camcan import load_camcan_data
from datasets.btnrh import load_BTNRH_data
from utils.nm import hbr_data_split, estimate_centiles
from plots.plots import plot_nm_range_site, plot_comparison, plot_neurooscillochart, plot_age_dist2
from utils.nm import model_quantile_evaluation, calculate_oscilochart
from utils.IO import make_config, merge_datasets
import pandas as pd
from pcntoolkit.normative_parallel import execute_nm
import warnings
warnings.filterwarnings("ignore")

In [None]:
project = "BIOMAG2024"
project_dir = f'/home/{username}/Results/{project}/'

mainParallel_path = os.path.join(package_path, 'src', 'mainParallel.py')

features_dir = os.path.join(project_dir, 'Features')
features_log_path = os.path.join(features_dir, 'log')
features_temp_path = os.path.join(features_dir,'temp')

nm_processing_dir = os.path.join(project_dir, 'NM')

job_configs = {'log_path':features_log_path, 'module':'mne', 'time':'1:00:00', 'memory':'20GB', 
                'partition':'normal', 'core':1, 'node':1, 'batch_file_name':'batch_job'}

if not os.path.isdir(features_log_path):
    os.makedirs(features_log_path)

if not os.path.isdir(features_temp_path):
    os.makedirs(features_temp_path)
    
if not os.path.isdir(nm_processing_dir):
    os.makedirs(nm_processing_dir)
    
configs = make_config(project, project_dir)

subjects = merge_datasets(datasets)

In [None]:
### Parallel feature extraction  

# Running Jobs
start_time = submit_jobs(mainParallel_path, features_dir, subjects, 
                features_temp_path, job_configs=job_configs)
# Checking jobs
failed_jobs = check_jobs_status(username, start_time)

falied_subjects = {failed_job:subjects[failed_job] for failed_job in failed_jobs}

while len(failed_jobs)>0:
    # Re-running Jobs
    start_time = submit_jobs(mainParallel_path, features_dir, falied_subjects, 
                features_temp_path, job_configs=job_configs, config_file=os.path.join(project_dir, project+'.json'))
    # Checking jobs
    failed_jobs = check_jobs_status(username, start_time)

collect_results(features_dir, subjects, features_temp_path, file_name='all_features')

In [None]:
### Data preparation for Normative Modeling

feature_path = os.path.join(features_dir, "all_features.csv")
BTNRH_cov_path = "/project/meganorm/Data/BTNRH/BTNRH/BIDS_data/participants.tsv"
camcan_cov_path = '/project/meganorm/Data/camcan/CamCAN/cc700/participants.tsv'

camcan_data = load_camcan_data(feature_path, camcan_cov_path)
BTNRH_data = load_BTNRH_data(feature_path, BTNRH_cov_path)

merged_data = pd.concat([BTNRH_data, camcan_data], axis=0)

merged_data['age'] = merged_data['age']/100

biomarker_num = hbr_data_split(merged_data, nm_processing_dir, drop_nans=True, batch_effects=['gender', 'site'])

In [None]:
### Setting up NM configs

python_path = '/project/meganorm/Software/Miniconda3/envs/mne/bin/python' 

hbr_configs = {
                'homo_Gaussian_linear':{'model_type':'linear', 'likelihood':'Normal', 'linear_sigma':'False',
                                   'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_Gaussian_bspline':{'model_type':'bspline', 'likelihood':'Normal', 'linear_sigma':'False',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_SHASH_linear':{'model_type':'linear', 'likelihood':'SHASHb', 'linear_sigma':'False',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'homo_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'False',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'}, 
                'hetero_Gaussian_linear':{'model_type':'linear', 'likelihood':'Normal', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'},
                'hetero_Gaussian_bspline':{'model_type':'bspline', 'likelihood':'Normal', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'False', 'linear_delta':'False'},
                'hetero_SHASH_linear':{'model_type':'linear', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
                'hetero_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
            }

inscaler='None' 
outscaler='minmax' 
batch_size = 1
outputsuffix = '_estimate'

respfile = os.path.join(nm_processing_dir, 'y_train.pkl')
covfile = os.path.join(nm_processing_dir, 'x_train.pkl')

testrespfile_path = os.path.join(nm_processing_dir, 'y_test.pkl')
testcovfile_path = os.path.join(nm_processing_dir, 'x_test.pkl')

trbefile = os.path.join(nm_processing_dir, 'b_train.pkl')
tsbefile = os.path.join(nm_processing_dir, 'b_test.pkl')

memory = '2gb'
duration = '5:00:00'
cluster_spec = 'slurm'

In [None]:
### Running NM

#for method in hbr_configs.keys():
method = 'hetero_SHASH_bspline'
processing_dir = os.path.join(nm_processing_dir, method) + '/'
nm_log_path = os.path.join(processing_dir, 'log') + '/'

if not os.path.isdir(processing_dir):
    os.makedirs(processing_dir)
if not os.path.isdir(nm_log_path):
    os.makedirs(nm_log_path)

execute_nm(processing_dir, python_path,
            'NM', covfile, respfile, batch_size, memory, duration, alg='hbr', 
            log_path=nm_log_path, binary=True, testcovfile_path=testcovfile_path, 
            testrespfile_path=testrespfile_path,trbefile=trbefile, tsbefile=tsbefile, 
            model_type=hbr_configs[method]['model_type'], likelihood=hbr_configs[method]['likelihood'],  
            linear_sigma=hbr_configs[method]['linear_sigma'], random_slope_mu=hbr_configs[method]['random_slope_mu'],
            linear_epsilon=hbr_configs[method]['linear_epsilon'], linear_delta=hbr_configs[method]['linear_delta'], 
            savemodel='True', inscaler=inscaler, outscaler=outscaler, outputsuffix=outputsuffix, 
            interactive='auto', cluster_spec=cluster_spec)

In [None]:
### Evaluating quantiles using MACE

hbr_configs = {
                'hetero_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'True', 'linear_epsilon':'True', 'linear_delta':'True'},
                'hetero_SHASH_bspline2':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
            }
mace, best_models, bio_ids = model_quantile_evaluation(hbr_configs, nm_processing_dir, testcovfile_path, 
                              testrespfile_path, tsbefile, biomarker_num, plot=False, outputsuffix='estimate')


plot_comparison(nm_processing_dir, hbr_configs, biomarker_num)

In [None]:
hbr_configs = {
                'hetero_SHASH_bspline2':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                    'random_slope_mu':'False', 'linear_epsilon':'True', 'linear_delta':'True'},
            }

## Plotting ranges
for config in hbr_configs.keys():
    processing_path = os.path.join(nm_processing_dir, config)
    
    q = estimate_centiles(processing_path, biomarker_num, quantiles=[0.05, 0.25, 0.5, 0.75, 0.95],
                          batch_map={0:{'Male':0, 'Female':1}, 1:{'CAMCAN':0,'BTNRH':1}}, 
                          age_range=[6, 85])
    plot_nm_range_site(processing_path, nm_processing_dir, experiment_id=2)

# exp 0: ap - periodic > 0 => 0 and hetero_SHASH_bspline
# exp 1: hetero_SHASH_bspline
# exp 2:  ap - periodic > 0 => 0 and hetero_SHASH_bspline and -inf are removed 

In [None]:
# Calculateing Oscilograms

model_path = f'/home/{username}/Results/BIOMAG2024/NM/hetero_SHASH_bspline/Models'

gender_ids = {'Male':0, 'Female':1}
frequency_band_model_ids = {'Theta':6, 'Alpha':3, 'Beta':4, 'Gamma':5}

oscilograms = calculate_oscilochart(model_path, gender_ids, frequency_band_model_ids)

plot_neurooscillochart(oscilograms, nm_processing_dir)

In [None]:
### Running Freesurfer on T1 Anatomical data for source reconstruction

from utils.freesurfer import run_parallel_reconall

mri_directory = '/project/meganorm/Data/camcan/CamCAN/cc700/mri/pipeline/release004/BIDS_20190411/anat'
processing_dir = '/home/meganorm-smkia/temp'
freesurfer_path = '/project/meganorm/Software/freesurfer'
results_directory = '/project/meganorm/Data/camcan/CamCAN/cc700/mri/pipeline/release004/BIDS_20190411/derivatives'

subject_ids = run_parallel_reconall(mri_directory, results_directory, processing_dir, freesurfer_path)


In [None]:
## Re-running the failed recon-all jobs, Run this cell when all the jobs above are finished.
from utils.freesurfer import check_log_for_success, rerun_failed_subs

failed_jobs = check_log_for_success(results_directory, subject_ids)
rerun_failed_subs(failed_jobs, mri_directory, results_directory, 
                          processing_dir, freesurfer_path)


In [None]:
# QC on freesurfer results

from utils.freesurfer import freesurfer_QC
qc_passed_samples, qc_failed_samples, missing_samples = freesurfer_QC(results_directory)

In [None]:
# plot age distribution for different sites and train/test/validation partitions
base_dir = "/home/meganorm-mznasrabadi/Results/BIOMAG2024/NM"
plot_age_dist2(base_dir, val=False)