In [1]:
username = 'meganorm-mznasrabadi'

In [2]:
import os

package_path = f'/home/{username}/MEGaNorm/'
os.chdir(package_path)

from utils.parallel import submit_jobs, check_jobs_status, collect_results
from datasets.camcan import load_camcan_data
from utils.nm import hbr_data_split
from plots.plots import plot_nm_range2

# import pcntoolkit as ptk

In [3]:
### Paths and configs


mainParallel_path = os.path.join(package_path, 'src', 'mainParallel.py')

layout_path = os.path.join(package_path, "layouts", "Megin_MAG_All.json")

data_path = '/project/meganorm/Data/camcan/CamCAN/cc700/meg/pipeline/release005/BIDSsep/derivatives_rest/aa/AA_movecomp_transdef/aamod_meg_maxfilt_00003/'
target_dir = f'/home/{username}/Data/CamCAN/Features/'

log_path = f'/home/{username}/temp/log/'
temp_path = f'/home/{username}/temp/tmp/'

processing_dir = f'/home/{username}/Data/CamCAN/NM/'

job_configs = {'log_path':log_path, 'module':'mne', 'time':'1:00:00', 'memory':'20GB', 
                'partition':'normal', 'core':1, 'node':1, 'batch_file_name':'batch_job'}

if not os.path.isdir(log_path):
    os.makedirs(log_path)
    
if not os.path.isdir(processing_dir):
    os.makedirs(processing_dir)


In [4]:
### Parallel feature extraction

subjects = os.listdir(data_path)
 
# Running Jobs
start_time = submit_jobs(mainParallel_path, target_dir, data_path, subjects, 
                temp_path, job_configs=job_configs)
# Checking jobs
failed_job_names= check_jobs_status(username, start_time)

while len(failed_job_names)>0:
    # Re-running Jobs
    start_time = submit_jobs(mainParallel_path, target_dir, data_path, failed_job_names, 
                temp_path, job_configs=job_configs)
    # Checking jobs
    failed_job_names= check_jobs_status(username, start_time)

collect_results(target_dir, subjects, temp_path, file_name='camcan_features')

Submitted batch job 12560343
Submitted batch job 12560344
Submitted batch job 12560345
Submitted batch job 12560346
Submitted batch job 12560347
Submitted batch job 12560348
Submitted batch job 12560349
Submitted batch job 12560350
Submitted batch job 12560351
Submitted batch job 12560352
Submitted batch job 12560353
Submitted batch job 12560354
Submitted batch job 12560355
Submitted batch job 12560356
Submitted batch job 12560357
Submitted batch job 12560358
Submitted batch job 12560359
Submitted batch job 12560360
Submitted batch job 12560361
Submitted batch job 12560362
Submitted batch job 12560363
Submitted batch job 12560364
Submitted batch job 12560365
Submitted batch job 12560366
Submitted batch job 12560367
Submitted batch job 12560368
Submitted batch job 12560369
Submitted batch job 12560370
Submitted batch job 12560371
Submitted batch job 12560372
Submitted batch job 12560373
Submitted batch job 12560374
Submitted batch job 12560375
Submitted batch job 12560376
Submitted batc

KeyboardInterrupt: 

In [6]:
### Data preparation for Normative Modeling

camcan_cov_path = '/project/meganorm/Data/camcan/CamCAN/cc700/participants.tsv'
camcan_feature_path = f'/home/{username}/Data/CamCAN/Features/camcan_features.csv'

camcan_data = load_camcan_data(camcan_feature_path, camcan_cov_path)

biomarker_num = hbr_data_split(camcan_data, processing_dir, drop_nans=True, batch_effects=['gender','site'])


In [None]:
### Setting up NM configs

python_path = '/project/meganorm/Software/Miniconda3/envs/mne/bin/python' 

hbr_configs = {'hetero_SHASH_bspline':{'model_type':'bspline', 'likelihood':'SHASHb', 'linear_sigma':'True',
                                   'random_slope_mu':'True', 'linear_epsilon':'True', 'linear_delta':'True'},
            }

inscaler='minmax' 
outscaler='minmax' 
batch_size = 1
outputsuffix = '_estimate'

respfile = processing_dir + 'y_train.pkl'
covfile = processing_dir + 'x_train.pkl'

testrespfile_path = processing_dir + 'y_test.pkl'
testcovfile_path = processing_dir + 'x_test.pkl'

trbefile = processing_dir + 'b_train.pkl'
tsbefile = processing_dir + 'b_test.pkl'

memory = '2gb'
duration = '2:00:00'
method = 'hetero_SHASH_bspline'
cluster_spec = 'slurm'

In [None]:
### Running NM

ptk.normative_parallel.execute_nm(processing_dir, python_path,
               'NM', covfile, respfile, batch_size, memory, duration, alg='hbr', 
               log_path=log_path, binary=True, testcovfile_path=testcovfile_path, 
               testrespfile_path=testrespfile_path,trbefile=trbefile, tsbefile=tsbefile, 
               model_type=hbr_configs[method]['model_type'], likelihood=hbr_configs[method]['likelihood'],  
               linear_sigma=hbr_configs[method]['linear_sigma'], random_slope_mu=hbr_configs[method]['random_slope_mu'],
               linear_epsilon=hbr_configs[method]['linear_epsilon'], linear_delta=hbr_configs[method]['linear_delta'], 
               savemodel='True', inscaler=inscaler, outscaler=outscaler, outputsuffix=outputsuffix, 
               interactive='auto', cluster_spec=cluster_spec)

In [None]:
### Plotting ranges
for i in range(biomarker_num):
    plot_nm_range2(processing_dir, ind=i, parallel=True)