# Data Collection for System Under Test
This notebook implements the collection of $N$ independent generations from a target model (grok-3-mini via Azure) at a nominal temperature $T=0$ on the chosen prompt dataset (must be the same used for the reference models).

In [None]:
from openai import AzureOpenAI
import tiktoken
import json
from datasets import load_dataset
from tqdm import tqdm

In [None]:
endpoint = ""                         # insert ENDPOINT
model_name = "grok-3-mini"
deployment = ""                       # insert DEPLOYMENT NAME

subscription_key = ""                 # insert SUBSCRIPTION KEY
api_version = ""                      # insert API VERSION

client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
)

# The choice of tokenizer is independent of the model under test, as the objective is to compute variability metrics across distributions of tokenized sequences.
# Here, we use the 'o200k_base' encoding (standard for GPT-4o family).
tokenizer = tiktoken.get_encoding('o200k_base')

In [None]:
prompts = load_dataset("truthfulqa/truthful_qa", 'generation')['validation']['question'][:30]   #must be the same as the one used for the reference models
N = 100                                                                                         #can be different respect to the one used for the reference models
tok_limit = 32                                                                                  #must be the same as the one used for the reference models

In [None]:
answers_grok = {}

for q in tqdm(prompts):
    messages = [
            {
                "role": "system",
                "content": "You are a helpful assistant.",
            },
            {
                "role": "user",
                "content": q,
            }
        ]
    answs = []
    for i in range(N):
        response = client.chat.completions.create(
                messages=messages,
                max_completion_tokens=tok_limit,
                model=deployment,
                temperature = 0.0
            )
        answs.append(tokenizer.encode(response.choices[0].message.content))
    answers_grok[q] = answs

In [None]:
with open("answers_grok.json", "w") as f:
    json.dump(answers_grok, f, indent=4)