In [None]:
"""
Quantum Inspired Hamiltonian Monte Carlo sampler for per-subject PK parameters
HMC with stochastic twist for a quantum annealing effect to escape local minima 
and faster convergence.

Saves posterior samples per subject: samples_ID{ID}.npy
"""

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import math
import os
import matplotlib.pyplot as plt

# -----------------------------
# Config
# -----------------------------
torch.set_default_dtype(torch.float64)
np.random.seed(1)
torch.manual_seed(1)

DATAFILE = "EstData.csv"
OUT_DIR = "sgnht_samples"
os.makedirs(OUT_DIR, exist_ok=True)

# Sampler hyperparameters (tuned moderately)
L = 5                      # number of leap steps (inner loop)
eps = 0.01                 # integrator step size (analogous to learning_rate in NN code)
num_iterations = 2000      # total SGNHT iterations per subject
burn_in = 500
thin = 5                   # thin to reduce autocorrelation / output size
m0 = 10.0                  # nominal mass baseline (will be randomized like the NN code)
sigmam = 0.1               # sd for log-mass randomness
T = 1e-6                   # temperature / artificial noise scale (small)
d = 3                      # dimension of theta (logKa, logCL, logV)

# Prior parameters (log-parameter priors)
prior_mu = torch.tensor([ -1.0, 3.0, 0.0 ], dtype=torch.get_default_dtype())  # prior means for logKa, logCL, logV
prior_sigma = torch.tensor([1.0, 1.0, 1.0], dtype=torch.get_default_dtype())

# Likelihood noise (obs error) -- you can estimate this instead of fixing
obs_sigma = 1.0

# -----------------------------
# PK forward model (torch)
# one-compartment oral analytic solution (single dose)
# -----------------------------
def pk_one_compartment_conc_torch(logKa, logCL, logV, dose, times):
    """
    inputs: logKa, logCL, logV: scalars (torch tensors)
            dose: scalar (float or tensor)
            times: 1D numpy or torch array
    returns: concentrations at times (torch tensor, same device/dtype)
    """
    Ka = torch.exp(logKa)
    CL = torch.exp(logCL)
    V  = torch.exp(logV)
    t = torch.tensor(times, dtype=logKa.dtype, device=logKa.device)
    kel = CL / V
    # handle Ka ~= kel numerical issue by stable formula; here assume Ka != kel
    conc = (dose * Ka / (V * (Ka - kel))) * (torch.exp(-kel * t) - torch.exp(-Ka * t))
    # for any times where Ka==kel numerically, we could fallback; omitted for brevity
    return conc

# -----------------------------
# log-posterior (unnormalized) and its gradient
# returns negative log posterior (U) for SGNHT
# -----------------------------
def U_and_grad(logtheta, dose, times_np, obs_np):
    """
    logtheta: torch tensor shape (3,) with requires_grad=True
    times_np, obs_np: numpy arrays of same length
    returns: U (scalar torch), gradient computed in logtheta.grad
    """
    # Forward model
    conc = pk_one_compartment_conc_torch(logtheta[0], logtheta[1], logtheta[2], float(dose), times_np)
    obs = torch.tensor(obs_np, dtype=logtheta.dtype, device=logtheta.device)

    # Negative log-likelihood (Gaussian)
    resid = obs - conc
    nll = 0.5 * torch.sum((resid / obs_sigma) ** 2) + 0.5 * len(obs) * math.log(2.0 * math.pi * (obs_sigma ** 2))

    # Negative log-prior (Gaussian on log-params)
    nlp = 0.5 * torch.sum(((logtheta - prior_mu) / prior_sigma) ** 2) + 0.5 * torch.sum(torch.log(2.0 * math.pi * (prior_sigma**2)))

    U = nll + nlp
    # compute gradient
    U.backward()
    grad = logtheta.grad.clone()
    logtheta.grad.zero_()
    return U.detach(), grad

# -----------------------------
# SGNHT integrator (discrete-time updates)
# adapted from NN-style code to low-dimensional theta
# -----------------------------
def sgnht_sampler(times_np, obs_np, dose,
                  n_iter=num_iterations, eps=eps, L=L, m0=m0, sigmam=sigmam, burn_in=burn_in, thin=thin):
    """
    Runs SGNHT sampler for a single subject.
    Returns collected samples (after burn-in and thinning) as numpy array shape (n_samples, 3)
    """
    dtype = torch.get_default_dtype()
    device = torch.device("cpu")

    # initialize position (log-params) near prior mean
    logtheta = prior_mu.clone().to(device).detach()
    logtheta += 0.1 * torch.randn_like(logtheta)   # small perturbation
    logtheta = logtheta.clone().requires_grad_(True)

    # initialize momentum q ~ N(0, m)
    # we will randomize m each outer iteration similar to NN code
    q = torch.randn_like(logtheta)  # momentum

    # thermostat variable zeta (scalar)
    zeta = 0.0

    samples = []
    accepted = 0

    # Pre-allocate buffer for gradient usage
    for it in range(n_iter):
        # randomize mass like NN implementation
        m = float(10.0 ** (np.random.randn() * sigmam + math.log10(m0)))  # random mass > 0
        if m < 1e-3:
            m = 1e-3

        # perform L leapfrog-like SGNHT steps (stochastic integrator)
        for l in range(L):
            # --- update q: q <- q - eps * grad_U(logtheta) - eps * zeta * q + sqrt(2*eps*T) * normal_noise
            # compute gradient
            logtheta_temp = logtheta  # has requires_grad=True
            U, grad = U_and_grad(logtheta_temp, dose, times_np, obs_np)  # U and grad (torch)
            # convert grad to numpy float (torch tensor) - keep in torch
            noise = torch.randn_like(q) * math.sqrt(2.0 * eps * T)
            q = q - eps * grad - eps * zeta * q + noise

            # --- update position: logtheta <- logtheta + eps * q / m
            with torch.no_grad():
                logtheta += (eps / m) * q
            # ensure grad zero for next step
            logtheta.requires_grad_(True)

            # --- update thermostat zeta (scalar) like NN formula zeta <- zeta + eps * (p^2/m - d*T)
            p2 = float(torch.sum(q * q).detach().cpu().numpy())
            zeta = zeta + eps * (p2 / m - d * T)

        # Optional MH correction (in NN code, they did this; for SGNHT it's optional)
        # Here we skip MH by design (SGNHT is asymptotically correct without MH), but
        # you can implement a MH accept/reject if desired.

        # Collect sample after burn-in & thinning
        if it >= burn_in and ((it - burn_in) % thin == 0):
            samples.append(logtheta.detach().cpu().numpy().copy())

        # occasionally print progress
        if (it + 1) % (n_iter // 5) == 0:
            print(f"  Iter {it+1}/{n_iter}, zeta={zeta:.4e}, recent logtheta={logtheta.detach().cpu().numpy()}")

    samples = np.array(samples)
    return samples

# -----------------------------
# Driver: loop subjects, run sampler, save results, basic plots
# -----------------------------
def run_all_subjects(datafile=DATAFILE):
    df = pd.read_csv(datafile)
    subject_ids = np.unique(df['ID'].values)
    print(f"Found {len(subject_ids)} subjects")

    for sid in subject_ids:
        sub = df[df['ID'] == sid]
        # observation rows: EVID == 0 (observations)
        obs_rows = sub[sub['EVID'] == 0]
        times = obs_rows['TIME'].values.astype(float)
        obs = obs_rows['DV'].values.astype(float)
        # dose: take first nonzero AMT from dosing events (EVID == 1) if present
        dose_rows = sub[sub['EVID'] == 1]
        dose = float(dose_rows['AMT'].iloc[0]) if (len(dose_rows) > 0 and (dose_rows['AMT'].sum() > 0)) else 100.0

        print(f"\nSubject {sid}: n_obs={len(times)}, dose={dose}, BW={sub['BW'].iloc[0]}, COMED={sub['COMED'].iloc[0]}")
        samples = sgnht_sampler(times, obs, dose, n_iter=num_iterations, eps=eps, L=L, m0=m0, sigmam=sigmam,
                                burn_in=burn_in, thin=thin)
        print(f"  Collected {samples.shape[0]} posterior samples for subject {sid}")
        # save
        np.save(os.path.join(OUT_DIR, f"samples_ID{sid}.npy"), samples)

        # quick posterior predictive plot (mean +/- 90% CI)
        if samples.shape[0] > 0:
            times_plot = np.linspace(times.min(), times.max(), 80)
            preds = []
            for s in samples[np.random.choice(len(samples), size=min(200, len(samples)), replace=False)]:
                conc = (lambda s_arr: 
                        (np.exp(s_arr[0]) * dose / (np.exp(s_arr[2]) * (np.exp(s_arr[0]) - np.exp(s_arr[1])))) *
                        (np.exp(- (np.exp(s_arr[1]) / np.exp(s_arr[2])) * times_plot) - np.exp(- np.exp(s_arr[0]) * times_plot))
                       )(s)
                preds.append(conc)
            preds = np.array(preds)
            mean_pred = preds.mean(axis=0)
            lower = np.percentile(preds, 5, axis=0)
            upper = np.percentile(preds, 95, axis=0)

            plt.figure(figsize=(6,4))
            plt.fill_between(times_plot, lower, upper, alpha=0.2, label='90% CI')
            plt.plot(times_plot, mean_pred, lw=2, label='Posterior mean')
            plt.scatter(times, obs, c='k', s=15, label='Observed')
            plt.xlabel("Time")
            plt.ylabel("Concentration")
            plt.title(f"Subject {sid} posterior predictive")
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(OUT_DIR, f"ppred_ID{sid}.png"))
            plt.close()

    print("All subjects done.")

if __name__ == "__main__":
    run_all_subjects()


Found 48 subjects

Subject 1: n_obs=25, dose=100.0, BW=58, COMED=0
  Iter 400/2000, zeta=7.7710e+01, recent logtheta=[-0.34167437 -6.2576085   1.67645514]
  Iter 800/2000, zeta=7.7917e+01, recent logtheta=[ 0.16772425 -6.55542687  1.71948879]
  Iter 1200/2000, zeta=7.7929e+01, recent logtheta=[ 0.29511203 -6.63491737  1.72900221]
  Iter 1600/2000, zeta=7.7931e+01, recent logtheta=[ 0.33802611 -6.66364315  1.73218421]
  Iter 2000/2000, zeta=7.7931e+01, recent logtheta=[ 0.35453438 -6.67506564  1.73341246]
  Collected 300 posterior samples for subject 1

Subject 2: n_obs=25, dose=100.0, BW=85, COMED=1
  Iter 400/2000, zeta=2.6760e+01, recent logtheta=[-3.2297155  -2.11791008  0.73680655]
  Iter 800/2000, zeta=2.8579e+01, recent logtheta=[-4.15825324 -2.36204213 -0.77516674]
  Iter 1200/2000, zeta=4.6334e+01, recent logtheta=[-7.15066339 -5.16280275 -3.20351563]
  Iter 1600/2000, zeta=4.7943e+01, recent logtheta=[-7.53009679 -5.44367529 -4.92909737]
  Iter 2000/2000, zeta=4.8263e+01, rece

In [3]:
"""
bridge_sgnht_to_csv.py

Aggregate subject-level posterior samples (from sgnht_sampler)
into population-level posterior + omegas for use in dose_finder.py.

Usage:
    python3 bridge_sgnht_to_csv.py
"""

import numpy as np
import pandas as pd
import glob

# -------------------------
# Settings
# -------------------------
INPUT_PATTERN = "sgnht_samples/samples_ID*.npy"   # where sgnht_sampler saved subject-level samples
OUTPUT_CSV    = "nlme_hmc_samples.csv"
N_KEEP        = 2000   # number of posterior draws to subsample (after burn-in)

# -------------------------
# Load all subject samples
# -------------------------
files = sorted(glob.glob(INPUT_PATTERN))
if not files:
    raise FileNotFoundError(f"No files found with pattern {INPUT_PATTERN}")

print(f"Found {len(files)} subject posterior files")

subject_samples = []
for f in files:
    arr = np.load(f)  # shape: (n_samples, n_params=3) for (logKa, logCL, logV)
    # discard burn-in (first 50%) if applicable
    n_burn = arr.shape[0] // 2
    arr = arr[n_burn:]
    subject_samples.append(arr)

# stack into (n_subjects, n_samples, n_params)
subject_samples = np.stack(subject_samples, axis=0)
n_subj, n_samp, n_param = subject_samples.shape
print(f"Shape: {subject_samples.shape} (subjects, samples, params)")

# -------------------------
# Compute population-level parameters
# -------------------------
records = []
rng = np.random.default_rng(42)

# sample across posterior draws
for i in range(N_KEEP):
    draw_idx = rng.integers(0, n_samp)
    subj_draws = subject_samples[:, draw_idx, :]  # shape (n_subj, 3)

    # population means
    logKa_pop = np.mean(subj_draws[:,0])
    logCL_pop = np.mean(subj_draws[:,1])
    logV_pop  = np.mean(subj_draws[:,2])

    # omegas = std dev across subjects
    omega_Ka = np.std(subj_draws[:,0])
    omega_CL = np.std(subj_draws[:,1])
    omega_V  = np.std(subj_draws[:,2])

    # dummy PD params (E0, Emax, EC50) — replace with real estimates if available
    logE0_pop    = np.log(3.3)    # just placeholder
    logEmax_pop  = np.log(10.0)
    logEC50_pop  = np.log(1.0)
    omega_E0     = 0.3
    omega_Emax   = 0.3
    omega_EC50   = 0.3

    records.append(dict(
        logKa_pop=logKa_pop,
        logCL_pop=logCL_pop,
        logV_pop=logV_pop,
        logE0_pop=logE0_pop,
        logEmax_pop=logEmax_pop,
        logEC50_pop=logEC50_pop,
        omega_0=omega_CL,
        omega_1=omega_V,
        omega_2=omega_Ka,
        omega_3=omega_E0,
        omega_4=omega_Emax,
        omega_5=omega_EC50
    ))

# -------------------------
# Save CSV
# -------------------------
df = pd.DataFrame.from_records(records)
df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved {OUTPUT_CSV} with shape {df.shape}")


Found 48 subject posterior files
Shape: (48, 150, 3) (subjects, samples, params)
Saved nlme_hmc_samples.csv with shape (2000, 12)


In [None]:
import numpy as np
import pandas as pd
import os

# Directory where SGNHT per-subject samples are stored
SAMPLES_DIR = "qhmc_samples"
OUTFILE = "nlme_hmc_samples.csv"

# Subject IDs from files
files = [f for f in os.listdir(SAMPLES_DIR) if f.startswith("samples_ID") and f.endswith(".npy")]
files.sort()  # optional: sort by ID

all_records = []

for f in files:
    sid = int(f.replace("samples_ID", "").replace(".npy", ""))
    samples = np.load(os.path.join(SAMPLES_DIR, f))  # shape (n_samples, 3) logKa, logCL, logV

    # compute population means from samples
    logKa_pop = samples[:,0].mean()
    logCL_pop = samples[:,1].mean()
    logV_pop  = samples[:,2].mean()

    # placeholder E0/Emax/EC50 (example: assign fixed values or sample)
    logE0_pop   = np.log(1.0)
    logEmax_pop = np.log(2.0)
    logEC50_pop = np.log(0.5)

    # placeholder omegas (SDs of random effects)
    omega_CL   = 0.3
    omega_V    = 0.3
    omega_Ka   = 0.3
    omega_E0   = 0.3
    omega_Emax = 0.3
    omega_EC50 = 0.3

    record = dict(
        log_cl_pop=logCL_pop,
        log_v_pop=logV_pop,
        log_ka_pop=logKa_pop,
        log_e0_pop=logE0_pop,
        log_emax_pop=logEmax_pop,
        log_ec50_pop=logEC50_pop,
        omega_0=omega_CL,
        omega_1=omega_V,
        omega_2=omega_Ka,
        omega_3=omega_E0,
        omega_4=omega_Emax,
        omega_5=omega_EC50
    )

    all_records.append(record)

# Convert to DataFrame and save
df_out = pd.DataFrame(all_records)
df_out.to_csv(OUTFILE, index=False)
print(f"Saved posterior CSV with {len(df_out)} rows to {OUTFILE}")


Saved posterior CSV with 48 rows to nlme_hmc_samples.csv
