In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pcntoolkit as ptk

import os
import pickle
import pandas as pd
from pcntoolkit.util.hbr_utils import *
import scipy


In [2]:
curdir = os.getcwd()
processing_dirname = "more_flexible_models_lifespan_big"
if not curdir.endswith(processing_dirname):
    processing_dir = os.path.join(curdir,"more_flexible_models_lifespan_big")
    if not os.path.isdir(processing_dir):
        os.mkdir(processing_dir)
    os.chdir(processing_dir)
else:
    curdir=os.path.dirname(processing_dir)
    
print(f"{curdir=}")
print(f"{processing_dir=}")

curdir='/project_cephfs/3022017.02/projects/stijdboe/make_results'
processing_dir='/project_cephfs/3022017.02/projects/stijdboe/make_results/more_flexible_models_lifespan_big'


In [3]:
# Get the lifespan_big data
datadir = '/project_cephfs/3022017.02/projects/stijdboe/Data/UMAP_data/'
respfile = os.path.join(datadir, 'trainselect.txt')       # measurements  (eg cortical thickness) of the training samples (columns: the various features/ROIs, rows: observations or subjects)
covfile = os.path.join(datadir, 'cov_tr.txt')        # covariates (eg age) the training samples (columns: covariates, rows: observations or subjects)
testrespfile_path = os.path.join(datadir, 'testselect.txt')       # measurements  for the testing samples
testcovfile_path = os.path.join(datadir, 'cov_te.txt')        # covariate file for the testing samples
# befile = os.path.join(datadir, 'Z_train.pkl')      # training batch effects file (eg scanner_id, gender)  (columns: the various batch effects, rows: observations or subjects)
# testbefile = os.path.join(datadir, 'Z_test.pkl')      # testing batch effects file

output_path = os.path.join(processing_dir, 'Models/')    #  output path, where the models will be written
if not os.path.isdir(output_path):
    os.mkdir(output_path)

log_dir = os.path.join(processing_dir, 'log/')           #
if not os.path.isdir(log_dir):
    os.mkdir(log_dir)


In [4]:
# Parameters shared by all models
model_type='bspline'
linear_mu='True'
linear_sigma='True'
random_intercept_mu='True'
centered_intercept_mu='False'
inscaler_type='standardize'
outscaler_type='standardize'
sampler = 'NUTS'
n_mcmc_samples = 1
n_tuning_samples = 1
n_chains = 1
n_cores = 1
target_accept = 0.99
alg='hbr'
saveoutput='True'
savemodel='True'
binary='True'

In [5]:
X_train = np.loadtxt(covfile)


In [6]:
X_train = np.loadtxt(covfile)
Y_train = np.loadtxt(respfile)
trainselect = os.path.join(datadir,'trainselect.txt')


In [7]:
X_test = np.loadtxt(testcovfile_path)
Y_test = np.loadtxt(testrespfile_path)
testselect = os.path.join(datadir,'testselect.txt')


In [8]:
inscaler_type='standardize'
outscaler_type='standardize'

inscaler = ptk.util.utils.scaler(inscaler_type)
X_train_standardized = inscaler.fit_transform(X_train)
X_test_standardized = inscaler.transform(X_test)

outscaler = ptk.util.utils.scaler(outscaler_type)
Y_train_standardized = np.squeeze(outscaler.fit_transform(Y_train))
Y_test_standardized = np.squeeze(outscaler.transform(Y_test))

In [9]:
model_names = ['SHASHb_1','SHASHb_2','Normal']
likelihood_map = {'SHASHb_1':'SHASHb','SHASHb_2':'SHASHb','Normal':'Normal'}
durationmap = {'Normal':'05:00:00','SHASHb':'05:00:00'}
epsilon_linear_map = {'SHASHb_1':'False','SHASHb_2':'True','Normal':'False'}
delta_linear_map = {'SHASHb_1':'False','SHASHb_2':'True','Normal':'False'}
# For each config

for mn in model_names:
    likelihood = likelihood_map[mn]
    linear_epsilon = epsilon_linear_map[mn]
    linear_delta = delta_linear_map[mn]
    outputsuffix = f'UMAP_{mn}'
    nm = ptk.normative.fit(covfile=covfile,
                           respfile=trainselect,
                           log_path=log_dir,
                           saveoutput=saveoutput,
                           output_path=output_path, 
                           savemodel=savemodel,
                           binary=binary,
                           outputsuffix=outputsuffix,
                           alg=alg,
                           sampler=sampler,
                           n_samples=n_mcmc_samples,
                           n_tuning=n_tuning_samples,
                           n_chains=n_chains,
                           cores=n_cores,
                           target_accept=target_accept,
                           inscaler=inscaler_type,
                           outscaler=outscaler_type,
                           likelihood=likelihood,
                           model_type=model_type,
                           linear_mu=linear_mu,
                           random_intercept_mu=random_intercept_mu,
                           centered_intercept_mu=centered_intercept_mu,
                           linear_sigma=linear_sigma,
                           linear_epsilon=linear_epsilon,
                           linear_delta = linear_delta,
                     )
    MAP = nm.hbr.find_map(X_train_standardized, Y_train_standardized, np.zeros_like(Y_train_standardized))
    with open(os.path.join(processing_dir, f'MAP_{outputsuffix}.pkl'),'wb') as file:
        pickle.dump(MAP, file)
    # Find and store the MAP

Processing data in /project_cephfs/3022017.02/projects/stijdboe/Data/UMAP_data/trainselect.txt



KeyboardInterrupt



In [None]:
with open(os.path.join(processing_dir, f'Models/NM_0_0_lifespanRight-Lateral-VentricleSHASHbTrueTrue.pkl'),'rb') as file:
    model = pickle.load(file)