In [1]:
import sys
from pathlib import Path

repo_root = Path.cwd().parent
sys.path.insert(0, str(repo_root))
from src.pika.probe.length_probe import LengthProbe

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
probe = LengthProbe()
probe.setup()

<src.pika.probe.length_probe.LengthProbe at 0x7ea8fff682c0>

In [3]:
import random

random.seed(42)

# Token counts with some noise — shorter prompts → higher success rate
train_token_counts = [150, 300, 450, 600, 750, 900, 180, 320, 480, 620]
val_token_counts   = [160, 310, 460, 610, 760]

# Placeholder prompts (not used by the probe when total_input_tokens is provided)
train_prompts = [f"prompt_{i}" for i in range(10)]
val_prompts   = [f"prompt_{i}" for i in range(5)]

train_data = (
    list(range(10)),                                                     # indices
    train_prompts,                                                       # prompts
    [0.75, 0.45, 0.30, 0.20, 0.15, 0.12, 0.78, 0.42, 0.28, 0.18],     # targets
    train_token_counts,                                                  # total_input_tokens
)
val_data = (
    list(range(5)),                                                      # indices
    val_prompts,                                                         # prompts
    [0.72, 0.44, 0.31, 0.19, 0.14],                                     # targets
    val_token_counts,                                                    # total_input_tokens
)

probe.train(train_data=train_data, val_data=val_data)


LengthProbe Training
Task type: regression
Train samples: 10
Val samples: 5

Grid search over alpha values: [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  Alpha:    0.001 | Val spearman: 1.0000
  Alpha:    0.010 | Val spearman: 1.0000
  Alpha:    0.100 | Val spearman: 1.0000
  Alpha:    1.000 | Val spearman: 1.0000
  Alpha:   10.000 | Val spearman: 1.0000
  Alpha:  100.000 | Val spearman: 1.0000
  Alpha: 1000.000 | Val spearman: 1.0000

Best alpha: 0.001
Best val spearman: 1.0000

Retraining on full train+val set with best alpha...
✓ Model trained on 15 samples


<src.pika.probe.length_probe.LengthProbe at 0x7ea8fff682c0>

In [4]:
# Predict WITHOUT targets — only (indices, prompts, total_input_tokens)
# This is the realistic inference scenario where we don't know the answer yet
test_token_counts = [200, 400, 550, 700, 850]
test_prompts = [f"test_prompt_{i}" for i in range(5)]

inference_data = (
    list(range(5)),          # indices
    test_prompts,            # prompts (placeholders)
    # NO targets!
    test_token_counts,       # total_input_tokens (now data[2], not data[3])
)

indices, predictions = probe.predict(inference_data)
print("Indices:    ", indices.tolist())
print("Predictions:", [f"{p:.4f}" for p in predictions.tolist()])

Indices:     [0, 1, 2, 3, 4]
Predictions: ['0.6057', '0.4252', '0.2898', '0.1544', '0.0191']
