# LLM Reference Sentence Generation

In [36]:
# --------------------------------------------------------------------
# 1. Import necessary libraries
# --------------------------------------------------------------------
import pandas as pd
import os
import time
from huggingface_hub import InferenceClient

In [None]:
# --------------------------------------------------------------------
# 2. Setup
# --------------------------------------------------------------------
client = InferenceClient(
    provider="together",
    api_key="REPLACE WITH YOUR KEY",
)

LLAMA_MODEL = "meta-llama/Llama-3.3-70B-Instruct"

temperature = 1
top_p = 1
max_tokens = 500
n = 1


In [38]:
# --------------------------------------------------------------------
# 3. Load your data_long
# --------------------------------------------------------------------
data_long = pd.read_csv('../Data/data_long.csv', encoding='utf-8-sig')
questions_data = data_long[data_long['subject_id'] == 1].sort_values('question').head(10)
questions_data['correct_answer'] = questions_data['correct_answer'].fillna('None')

In [39]:
# --------------------------------------------------------------------
# 4. Define Prompt Templates (Dictionary) - Clean Functional Prompts
# --------------------------------------------------------------------

prompt_templates = {

    'control': """You are simulating a participant thinking aloud while solving a reasoning problem.

Here is the reasoning problem the participant is facing:

{question_text}

Your task is to generate 5 short example sentences that illustrate RESPONSE CONTROL.

Response control refers to situations where the participant is inhibiting, rejecting, or resisting their first intuitive response.

CRITICAL INSTRUCTION:
Do NOT mention or refer to any specific content, numbers, names, or answers from the problem.
Focus ONLY on the generic cognitive process of inhibiting or resisting an initial answer impulse.

These sentences should sound like a real person trying to stop themselves from blurting out the first thing that came to mind.

Generate exactly 5 sentences. Number them.

Be concise and natural.

Do not include any other text besides the 5 numbered sentences.""",

    'generation': """You are simulating a participant thinking aloud while solving a reasoning problem.

Here is the reasoning problem the participant is facing:

{question_text}

Your task is to generate 5 short example sentences that illustrate RESPONSE GENERATION.

Response generation refers to situations where the participant is actively searching for new possible answers, alternatives, or hypotheses.

CRITICAL INSTRUCTION:
Do NOT mention or refer to any specific content, numbers, names, or answers from the problem.
Focus ONLY on the generic cognitive process of exploring options, generating alternatives, or mentally simulating scenarios.

These sentences should sound like a real person searching for possible answers, exploring ideas, or trying to figure things out.

Generate exactly 5 sentences. Number them.

Be concise and natural.

Do not include any other text besides the 5 numbered sentences.""",

    'justification': """You are simulating a participant thinking aloud while solving a reasoning problem.

Here is the reasoning problem the participant is facing:

{question_text}

Your task is to generate 5 short example sentences that illustrate RESPONSE JUSTIFICATION.

Response justification refers to situations where the participant is providing explicit reasons, arguments, or explanations to support the response they are currently considering.

CRITICAL INSTRUCTION:
Do NOT mention or refer to any specific content, numbers, names, or answers from the problem.
Focus ONLY on the generic cognitive process of explaining, defending, or rationalizing an answer (whatever that answer may be).

These sentences should sound like a real person giving reasons for why they believe their answer might be correct.

Generate exactly 5 sentences. Number them.

Be concise and natural.

Do not include any other text besides the 5 numbered sentences.""",

    'regulation': """You are simulating a participant thinking aloud while solving a reasoning problem.

Here is the reasoning problem the participant is facing:

{question_text}

Your task is to generate 5 short example sentences that illustrate RESPONSE REGULATION.

Response regulation refers to situations where the participant is monitoring or reflecting on their own thinking process.

CRITICAL INSTRUCTION:
Do NOT mention or refer to any specific content, numbers, names, or answers from the problem.
Focus ONLY on the generic cognitive process of reflecting on difficulty, uncertainty, effort allocation, or decisions about how to proceed.

These sentences should sound like a real person thinking about their thinking, expressing hesitation, confidence, effort, or meta-reasoning.

Generate exactly 5 sentences. Number them.

Be concise and natural.

Do not include any other text besides the 5 numbered sentences."""
}

In [40]:
# --------------------------------------------------------------------
# 5. Generate Prototypes Function
# --------------------------------------------------------------------
def generate_prototypes(question_row, function_type):
    prompt = prompt_templates[function_type].format(
        question_text=question_row['question_text'],
        lured_answer=question_row['lured_answer'],
        correct_answer=question_row['correct_answer']
    )

    messages = [
        {"role": "system", "content": "You are simulating a participant thinking aloud in a reasoning task."},
        {"role": "user", "content": prompt}
    ]

    response = client.chat.completions.create(
        model=LLAMA_MODEL,
        messages=messages,
        max_tokens=max_tokens,  # Adjust based on response length
        temperature=temperature,
        top_p=top_p,
        n=n
    )

    return response.choices[0].message.content

In [41]:
# --------------------------------------------------------------------
# 6. Main Loop with Error Handling
# --------------------------------------------------------------------
results = []

for idx, row in questions_data.iterrows():
    for function in ['control', 'generation', 'justification', 'regulation']:
        print(f"Generating {function.upper()} prototypes for Question {row['question']}...")
        
        success = False
        attempts = 0
        
        while not success and attempts < 4:
            try:
                prototypes = generate_prototypes(row, function)
                results.append({
                    'subject_id': row['subject_id'],
                    'question': row['question'],
                    'function': function,
                    'prototypes': prototypes
                })
                success = True  # Exit the retry loop
                time.sleep(2)  # Be gentle with the API
                
            except Exception as e:
                print(f"Error encountered: {e}. Waiting for 1 minute before retrying...")
                time.sleep(60)
                attempts += 1

Generating CONTROL prototypes for Question 1...
Generating GENERATION prototypes for Question 1...
Generating JUSTIFICATION prototypes for Question 1...
Generating REGULATION prototypes for Question 1...
Generating CONTROL prototypes for Question 2...
Generating GENERATION prototypes for Question 2...
Generating JUSTIFICATION prototypes for Question 2...
Generating REGULATION prototypes for Question 2...
Generating CONTROL prototypes for Question 3...
Generating GENERATION prototypes for Question 3...
Generating JUSTIFICATION prototypes for Question 3...
Generating REGULATION prototypes for Question 3...
Generating CONTROL prototypes for Question 4...
Generating GENERATION prototypes for Question 4...
Generating JUSTIFICATION prototypes for Question 4...
Generating REGULATION prototypes for Question 4...
Generating CONTROL prototypes for Question 5...
Generating GENERATION prototypes for Question 5...
Generating JUSTIFICATION prototypes for Question 5...
Generating REGULATION prototype

In [42]:
# --------------------------------------------------------------------
# 7. Save Results
# --------------------------------------------------------------------
results_df = pd.DataFrame(results)
results_df.to_csv('../Data/prototypes_llama3.3.csv', index=False, encoding='utf-8-sig')
print("Saved generated prototypes to ../Data/prototypes_llama3.3.csv")

Saved generated prototypes to ../Data/prototypes_llama3.3.csv
