In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import gaussian_kde
import pcntoolkit as ptk
from pcntoolkit.normative import estimate, predict, evaluate
from pcntoolkit.util.utils import compute_MSLL, create_design_matrix
from pcntoolkit.util.utils import calibration_descriptives
from pcntoolkit.model.hbr import bspline_fit, bspline_transform
from sklearn.preprocessing import OneHotEncoder

In [2]:
projdir = '/project_cephfs/3022017.02/projects/stijdboe/make_results/more_flexible_models_lifespan_big/'
textfiles = os.path.join(projdir,'textfiles')
if not os.path.exists(textfiles):
    os.mkdir(textfiles)
data_dir = '/project_cephfs/3022017.02/projects/stijdboe/Data/UMAP_data'


### Fit the models

Now we fit the models. This involves looping over the IDPs we have selected. We will use a module from PCNtoolkit to set up the design matrices, containing the covariates, fixed effects for site and nonlinear basis expansion. 

In [5]:
warp =  'WarpSinArcsinh'
# For each fold

this_identifier = f"blr_UMAP"
processing_dir = os.path.join(projdir, this_identifier+'/')
if not os.path.exists(processing_dir):
    os.mkdir(processing_dir)

log_dir = os.path.join(processing_dir, 'log')           #
if not os.path.isdir(log_dir):
    os.mkdir(log_dir)

# The paths to the data
X_tr_path = os.path.join(data_dir, 'cov_tr.txt')
Y_tr_path = os.path.join(data_dir, 'trainselect.txt')

X_te_path = os.path.join(data_dir, 'cov_te.txt')
Y_te_path = os.path.join(data_dir, 'testselect.txt')

X_tr = np.loadtxt(X_tr_path)
X_te = np.loadtxt(X_te_path)


Phi_tr = create_design_matrix(X_tr, basis='bspline')
Phi_te = create_design_matrix(X_te, basis='bspline')

# Save as text files 
Phi_tr_path = os.path.join(textfiles, f'Phi_tr.txt' )
Phi_te_path = os.path.join(textfiles, f'Phi_te.txt' )
np.savetxt(Phi_tr_path, Phi_tr)
np.savetxt(Phi_te_path, Phi_te)

Y_tr = np.loadtxt(Y_tr_path)
Y_te = np.loadtxt(Y_te_path)


os.chdir(processing_dir)

F_tr = Y_tr
F_tr_mean  = F_tr.mean(axis = 0)
F_tr_std = F_tr.std(axis= 0)
F_tr = (F_tr - F_tr_mean)/F_tr_std
F_te = Y_te
F_te = (F_te - F_tr_mean)/F_tr_std

# Save as text files 
Y_feature_tr_path = os.path.join(textfiles, f'Y_UMAP_tr.txt' )
Y_feature_te_path = os.path.join(textfiles, f'Y_UMAP_te.txt' )
np.savetxt(Y_feature_tr_path, F_tr)
np.savetxt(Y_feature_te_path, F_te)

# Fit the model
estimate(
         Phi_tr_path,  
         Y_feature_tr_path,
         testcov=Phi_te_path, 
         testresp=Y_feature_te_path, 
         alg='blr', optimizer = 'powell', 
         savemodel=True, warp=warp, warp_reparam=True)



Processing data in /project_cephfs/3022017.02/projects/stijdboe/make_results/more_flexible_models_lifespan_big/textfiles/Y_UMAP_tr.txt
Estimating model  1 of 1
configuring BLR ( order 1 )
Using default hyperparameters
Optimization terminated successfully.
         Current function value: 15322.752023
         Iterations: 5
         Function evaluations: 249
Saving model meta-data...
Evaluating the model ...
Writing outputs ...
