In [1]:
%pip install https://github.com/amarquand/PCNtoolkit/archive/refs/tags/v1.alpha.zip
%pip install graphviz
%conda install drawdata

Collecting https://github.com/amarquand/PCNtoolkit/archive/refs/tags/v1.alpha.zip
  Using cached https://github.com/amarquand/PCNtoolkit/archive/refs/tags/v1.alpha.zip
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Channels:
 - pytorch
 - conda-forge
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [2]:
# make scatterplot using plotly
import os

import pandas as pd
from pcntoolkit.dataio.norm_data import NormData
from pcntoolkit.normative_model.norm_conf import NormConf
from pcntoolkit.normative_model.norm_hbr import NormHBR
from pcntoolkit.regression_model.hbr.hbr_conf import HBRConf
from pcntoolkit.regression_model.hbr.likelihood import NormalLikelihood
from pcntoolkit.regression_model.hbr.prior import make_prior
from pcntoolkit.util.runner import Runner

resources_dir = "resources"
abs_path = os.path.abspath(resources_dir)
data_dir = os.path.join(abs_path, "data")
os.makedirs(data_dir, exist_ok=True)

In [3]:
# If you are running this notebook for the first time, you need to download the dataset from github.
# If you have already downloaded the dataset, you can comment out the following line

pd.read_csv(
    "https://raw.githubusercontent.com/predictive-clinical-neuroscience/PCNtoolkit-demo/refs/heads/main/data/fcon1000.csv"
).to_csv(os.path.join(data_dir, "fcon1000.csv"), index=False)
data = pd.read_csv(os.path.join(data_dir, "fcon1000.csv"))
covariates = ["age"]
batch_effects = ["sex", "site"]
response_vars = ["rh_MeanThickness_thickness", "WM-hypointensities"]
norm_data = NormData.from_dataframe(
    name="full",
    dataframe=data,
    covariates=["age"],
    batch_effects=["sex", "site"],
    response_vars=["rh_MeanThickness_thickness", "WM-hypointensities"],
)

# Leave two sites out for doing transfer and extend later
transfer_sites = ["Milwaukee_b", "Oulu"]
transfer_data, fit_data = norm_data.split_batch_effects(
    {"site": transfer_sites}, names=("transfer", "fit")
)

# Split into train and test sets
train, test = fit_data.train_test_split()
transfer_train, transfer_test = transfer_data.train_test_split()


In [4]:
# Create a NormConf object
save_dir = os.path.join(abs_path, "save_dir")
norm_conf = NormConf(
    savemodel=True,
    saveresults=True,
    save_dir=save_dir,
    inscaler="standardize",
    outscaler="standardize",
    basis_function="bspline",
    basis_function_kwargs={"order": 3, "nknots": 5},
)

Process: 46638 - Configuration of normative model is valid.


In [5]:
mu = make_prior(
    linear=True,
    slope=make_prior(dist_params=(0.0, 10.0)),
    intercept=make_prior(
        random=True,
        sigma=make_prior(dist_name="HalfCauchy", dist_params=(0.5,)),
        mu=make_prior(dist_name="Normal", dist_params=(0.0, 1.0)),
    ),
)

sigma = make_prior(
    linear=True,
    slope=make_prior(dist_params=(0.0, 10.0)),
    intercept=make_prior(
        random=True,
        sigma=make_prior(dist_name="HalfCauchy", dist_params=(0.5,)),
        mu=make_prior(dist_name="Normal", dist_params=(1.0, 1.0)),
    ),
    mapping="softplus",
    mapping_params=(0.0, 3.0),
)


# Configure the HBRConf object
hbr_conf = HBRConf(
    draws=10,
    tune=5,
    chains=4,
    pymc_cores=16,
    likelihood=NormalLikelihood(mu, sigma),
    nuts_sampler="nutpie",
)

new_hbr_model = NormHBR(norm_conf=norm_conf, reg_conf=hbr_conf)

sandbox_dir = os.path.join(resources_dir, "hbr_runner_sandbox")
os.makedirs(sandbox_dir, exist_ok=True)

runner = Runner(
    cross_validate=False,
    parallelize=True,
    time_limit="15:00:00",
    job_type="local",
    n_jobs=2,
    log_dir=os.path.join(sandbox_dir, "log_dir"),
    temp_dir=os.path.join(sandbox_dir, "temp_dir"),
)

runner.fit_predict(new_hbr_model, train, test, observe=False)

Process: 46638 - No python path specified. Using interpreter path of current process: /opt/anaconda3/envs/pcn_crash_course/bin/python
Process: 46638 - Saving runner state to /Users/stijndeboer/Projects/PCN/Crash course/resources/hbr_runner_sandbox/temp_dir/runner_state.json


In [7]:
Runner.load(
    "/Users/stijndeboer/Projects/PCN/Crash course/resources/hbr_runner_sandbox/temp_dir"
)

Process: 46638 - Loading runner state from /Users/stijndeboer/Projects/PCN/Crash course/resources/hbr_runner_sandbox/temp_dir/runner_state.json
Process: 46638 - No python path specified. Using interpreter path of current process: /opt/anaconda3/envs/pcn_crash_course/bin/python
Process: 46638 - Runner loaded
--------------------------------------------
Active jobs: 2
Finished jobs: 0
Failed jobs: 0
--------------------------------------------


<pcntoolkit.util.runner.Runner at 0x1739fba70>

: 