# Data Collection for System Under Test
This notebook implements the collection of $N$ independent generations from a target model (Claude Sonnet 4 via AWS) at a nominal temperature $T=0$ on the chosen prompt dataset (must be the same used for the reference models).

In [None]:
import boto3
from botocore.exceptions import ClientError
import tiktoken
import json
from datasets import load_dataset
from tqdm import tqdm


In [None]:
session = boto3.Session(profile_name="", region_name="")   #insert subscription data
client = session.client("bedrock-runtime")
credentials = session.get_credentials()                    #credentials saved

# The choice of tokenizer is independent of the model under test, as the objective is to compute variability metrics across distributions of tokenized sequences.
# Here, we use the 'o200k_base' encoding (standard for GPT-4o family).
tokenizer = tiktoken.get_encoding('o200k_base')

In [None]:
prompts = load_dataset("truthfulqa/truthful_qa", 'generation')['validation']['question'][:30]  #must be the same as the one used for the reference models
N = 100                                                                                        #can be different respect to the one used for the reference models
tok_limit =32                                                                                  #must be the same as the one used for the reference modelsS                                                                               #must be the same as the one used for the reference models

In [None]:
answers_claude = {}
for q in tqdm(prompts):
    body = json.dumps({
            "max_tokens": tok_limit,
            "temperature": 0.0,
            "messages": [{"role": "user", "content": q}],
            "anthropic_version": "bedrock-2023-05-31",
        })
    answs = []
    for i in range(N):
        response = client.invoke_model(
            modelId="eu.anthropic.claude-sonnet-4-20250514-v1:0",
            contentType="application/json",
            accept="application/json",
            body=body
        )
        result = json.loads(response["body"].read())
        answs.append(tokenizer.encode(result['content'][0]['text']))
    answers_claude[q] = answs

In [None]:
with open("answers_claudesonnet4.json", "w") as f:
    json.dump(answers_claude, f, indent=4)