In [1]:
import numpy as np
import pandas as pd
from covariance_target_informed_ridge import CovarianceTargetInformedModel

# ------------------------------------------
# 1. Toy "LLM Samples"
# ------------------------------------------
samples = [
    {"bmi": 1.0, "height": 0.2,  "weight": 10},
    {"bmi": 1.1, "height": 0.21, "weight": 12},
    {"bmi": 0.9, "height": 0.19, "weight": 11},
    {"bmi": 1.2, "height": 0.25, "weight": 9},
]

# Convert to matrix
keys = list(samples[0].keys())
M = np.array([[d[k] for k in keys] for d in samples])

# Compute mean
mu = M.mean(axis=0)

# Compute covariance manually or using numpy
Sigma = np.cov(M, rowvar=False)

# Regularize
Sigma += np.eye(len(mu)) * 1e-6

# Precision
P = np.linalg.inv(Sigma)

print("FEATURES:", keys)
print("MU:", mu)
print("COVARIANCE:\n", Sigma)
print("PRECISION:\n", P)


# ------------------------------------------
# 2. Make fake regression data
# ------------------------------------------
np.random.seed(42)
X = np.random.randn(200, len(mu))

# True coefficients (unknown to model)
beta_true = np.array([1.0, 0.2, 5.0])

y = X @ beta_true + 0.1 * np.random.randn(200)

# ------------------------------------------
# 3. Fit MAP model
# ------------------------------------------
model = CovarianceTargetInformedModel(mu=mu, P=P, model_type="ridge")
model.fit(X, y, feature_names=keys)

print("\nEstimated coefficients:")
for f, c in zip(keys, model.coef_):
    print(f"{f:10s}  → {c:.4f}")

print("Intercept:", model.intercept_)
print("R2 Score:", model.score(X, y))


FEATURES: ['bmi', 'height', 'weight']
MU: [ 1.05    0.2125 10.5   ]
COVARIANCE:
 [[ 1.66676667e-02  3.16666667e-03 -6.66666667e-02]
 [ 3.16666667e-03  6.92666667e-04 -2.16666667e-02]
 [-6.66666667e-02 -2.16666667e-02  1.66666767e+00]]
PRECISION:
 [[ 8.84991118e+02 -4.95250412e+03 -2.89828915e+01]
 [-4.95250412e+03  3.01478271e+04  1.93821472e+02]
 [-2.89828915e+01  1.93821472e+02  1.96036230e+00]]

Estimated coefficients:
bmi         → 1.0755
height      → 0.2515
weight      → 5.0380
Intercept: 0.002080095976861566
R2 Score: 0.9993228037417431


In [1]:
from llm_prior_elicitor import LLMPriorElicitor
from covariance_target_informed_ridge import CovarianceTargetInformedModel
import numpy as np

# ---------------------------------------------------
# 1. Choose LLM model
# ---------------------------------------------------
elicitor = LLMPriorElicitor(model_name="gpt-4o")

# ---------------------------------------------------
# 2. INSERT YOUR PROMPT HERE
# ---------------------------------------------------
prompt = """
You are an API, not a conversational assistant.
Your only job is to produce pure JSON and nothing else.
Do NOT include explanations, greetings, summaries, bullet points, or any text outside JSON.

Given these feature names:
- bmi
- height
- weight

Return a JSON object with numeric coefficient targets for a regularised linear model.
Use floating-point numbers without units. Add some randomness so one value is far off.

Output must be EXACTLY and ONLY JSON format like this:

{
"targets": {
    "bmi": <number>,
    "height": <number>,
    "weight": <number>
}
}
"""

# ---------------------------------------------------
# 3. Collect LLM samples → compute μ, Σ, P
# ---------------------------------------------------
keys, mu, Sigma, P = elicitor.sampling_llm(
    num_calls=10,
    base_prompt=prompt
)

print("Feature order returned by LLM:", keys)
print("Mu:", mu)
print("Sigma:\n", Sigma)
print("Precision:\n", P)


# ---------------------------------------------------
# 4. Create synthetic regression test data
# ---------------------------------------------------
np.random.seed(123)

# IMPORTANT: Use the *same feature order* that the LLM returned
X = np.random.randn(300, len(keys))

beta_true = np.array([0.8, 0.3, -0.5])   # same order (bmi, height, weight)
y = X @ beta_true + 0.1 * np.random.randn(300)

# ---------------------------------------------------
# 5. Fit MAP model with LLM-derived prior
# ---------------------------------------------------
model = CovarianceTargetInformedModel(mu=mu, P=P, model_type="ridge")

# Use LLM feature ordering
model.fit(X, y, feature_names=keys)

print("\n--- RESULTS ---")
print("Model coefficients:", model.coef_)
print("Intercept:", model.intercept_)
print("Training R2:", model.score(X, y))


[Elicitor] Using model: gpt-4o (provider=openai)

--- RAW LLM RESPONSE ---
```json
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02,
        "weight": 5.89
    }
}
```
--- END RESPONSE ---

✔ Valid sample 1/10

--- RAW LLM RESPONSE ---
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02,
        "weight": 15.0
    }
}
--- END RESPONSE ---

✔ Valid sample 2/10

--- RAW LLM RESPONSE ---
```json
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02,
        "weight": 15.0
    }
}
```
--- END RESPONSE ---

✔ Valid sample 3/10

--- RAW LLM RESPONSE ---
```json
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02,
        "weight": 15.5
    }
}
```
--- END RESPONSE ---

✔ Valid sample 4/10

--- RAW LLM RESPONSE ---
```json
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02,
        "weight": 15.5
    }
}
```
--- END RESPONSE ---

✔ Valid sample 5/10

--- RAW LLM RESPONSE ---
{
    "targets": {
        "bmi": 0.75,
        "height": 0.02