In [38]:
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm
from huggingface_hub import InferenceClient

In [39]:
# HuggingFace LLM Client Setup
client = InferenceClient(
    provider="together",
    api_key="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",
)

LLAMA_MODEL = "meta-llama/Llama-3.3-70B-Instruct"

temperature = 1
top_p = 1
max_tokens = 500
n = 1

In [40]:
data_long = pd.read_csv('../Data/data_long.csv')

In [41]:
prompt_template = """
You are an expert in the cognitive psychology of deliberation.

A participant answered the following reasoning problem:

QUESTION:
{question_text}

Intuitive (but incorrect) answer: {lured_answer}  
Correct answer: {correct_answer}

Here is the participant's think-aloud verbalization:

TRANSCRIPTION:
{transcription}

Your task is to identify **generic cognitive functions** or **reasoning strategies** that appear in the participant’s response, if any are identifiable.

Instructions:
- Focus on **underlying mental operations** involved in deliberate reasoning, not surface-level actions (e.g., Guessing, Repetition, Reading, Affirmation).
- Use **general and abstract** function names, ideally 1–2 words long.
- **Do not qualify or judge** the reasoning (e.g., correct/incorrect, good/poor, biased/unbiased).
- **Do not** refer to or restate the content of the question or answers.
- If no identifiable function is present, return: NA

### Output format:

[
  {{
    "operation": "<Short Name (1–2 words)>",
    "excerpt": "<Exact quote from the transcription>"
  }},
  ...
]

Return only the list above. If no operations apply, return: NA
"""

In [42]:
def get_deliberation_functions(row):
    prompt = prompt_template.format(
        question_text=row['question_text'],
        lured_answer=row['lured_answer'],
        correct_answer=row['correct_answer'],
        transcription=row['transcription_new']
    )

    response = client.chat.completions.create(
        model=LLAMA_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        top_p=top_p,
        max_tokens=max_tokens,
        n=n
    )

    return response.choices[0].message.content.strip()

In [43]:
output_path = '../Output/llm_generated_deliberation_functions.csv'
os.makedirs('../Output', exist_ok=True)

# --------------------------------------------------------------------
# Load Previous Results If Any
# --------------------------------------------------------------------
if os.path.exists(output_path):
    existing_results = pd.read_csv(output_path)
    processed_keys = set(zip(existing_results['subject_id'], existing_results['question']))
    print(f"Resuming from previous run. Already processed {len(existing_results)} rows.")
    results = existing_results.to_dict(orient='records')
else:
    processed_keys = set()
    results = []

# --------------------------------------------------------------------
# LLM Extraction Loop
# --------------------------------------------------------------------
for idx, row in tqdm(data_long.iterrows(), total=len(data_long), desc="Extracting functions"):

    key = (row['subject_id'], row['question'])

    # Skip if already processed
    if key in processed_keys:
        continue

    try:
        functions_text = get_deliberation_functions(row)

        results.append({
            'subject_id': row['subject_id'],
            'question': row['question'],
            'question_text': row['question_text'],
            'lured_answer': row['lured_answer'],
            'correct_answer': row['correct_answer'],
            'transcription': row['transcription_new'],
            'functions_extracted': functions_text
        })

        if len(results) % 30 == 0:
            print(f"Saving progress after {len(results)} total rows...")
            pd.DataFrame(results).to_csv(output_path, index=False)

    except Exception as e:
        print(f"Error for idx {idx}: {e}")
        time.sleep(60)

# --------------------------------------------------------------------
# Final Save
# --------------------------------------------------------------------
print("Final save...")
pd.DataFrame(results).to_csv(output_path, index=False)
print(f"All results saved to {output_path}")

Extracting functions:   3%|▎         | 30/1020 [00:31<21:09,  1.28s/it]

Saving progress after 30 total rows...


Extracting functions:   6%|▌         | 60/1020 [01:03<14:05,  1.13it/s]

Saving progress after 60 total rows...


Extracting functions:   9%|▉         | 90/1020 [01:33<14:41,  1.06it/s]

Saving progress after 90 total rows...


Extracting functions:  12%|█▏        | 120/1020 [02:12<17:12,  1.15s/it]

Saving progress after 120 total rows...


Extracting functions:  15%|█▍        | 150/1020 [02:39<12:43,  1.14it/s]

Saving progress after 150 total rows...


Extracting functions:  18%|█▊        | 180/1020 [03:06<11:11,  1.25it/s]

Saving progress after 180 total rows...


Extracting functions:  21%|██        | 210/1020 [03:36<13:42,  1.02s/it]

Saving progress after 210 total rows...


Extracting functions:  24%|██▎       | 240/1020 [04:03<10:46,  1.21it/s]

Saving progress after 240 total rows...


Extracting functions:  26%|██▋       | 270/1020 [04:28<10:38,  1.18it/s]

Saving progress after 270 total rows...


Extracting functions:  29%|██▉       | 300/1020 [05:04<18:27,  1.54s/it]

Saving progress after 300 total rows...


Extracting functions:  32%|███▏      | 330/1020 [05:33<13:51,  1.21s/it]

Saving progress after 330 total rows...


Extracting functions:  35%|███▌      | 360/1020 [06:05<10:51,  1.01it/s]

Saving progress after 360 total rows...


Extracting functions:  38%|███▊      | 390/1020 [06:42<10:33,  1.01s/it]

Saving progress after 390 total rows...


Extracting functions:  41%|████      | 420/1020 [07:36<18:44,  1.87s/it]

Saving progress after 420 total rows...


Extracting functions:  44%|████▍     | 450/1020 [08:22<07:04,  1.34it/s]

Saving progress after 450 total rows...


Extracting functions:  47%|████▋     | 480/1020 [09:12<14:41,  1.63s/it]

Saving progress after 480 total rows...


Extracting functions:  50%|█████     | 510/1020 [10:18<32:48,  3.86s/it]

Saving progress after 510 total rows...


Extracting functions:  52%|█████▏    | 531/1020 [11:02<27:48,  3.41s/it]

Error for idx 531: 503 Server Error: Service Temporarily Unavailable for url: https://router.huggingface.co/together/v1/chat/completions


Extracting functions:  53%|█████▎    | 541/1020 [12:29<22:38,  2.84s/it]  

Saving progress after 540 total rows...


Extracting functions:  56%|█████▌    | 571/1020 [13:28<17:43,  2.37s/it]

Saving progress after 570 total rows...


Extracting functions:  59%|█████▉    | 601/1020 [14:22<10:47,  1.54s/it]

Saving progress after 600 total rows...


Extracting functions:  62%|██████▏   | 631/1020 [15:12<09:35,  1.48s/it]

Saving progress after 630 total rows...


Extracting functions:  65%|██████▍   | 661/1020 [15:52<07:57,  1.33s/it]

Saving progress after 660 total rows...


Extracting functions:  68%|██████▊   | 691/1020 [16:27<05:29,  1.00s/it]

Saving progress after 690 total rows...


Extracting functions:  71%|███████   | 721/1020 [17:03<07:10,  1.44s/it]

Saving progress after 720 total rows...


Extracting functions:  74%|███████▎  | 751/1020 [17:37<05:32,  1.24s/it]

Saving progress after 750 total rows...


Extracting functions:  77%|███████▋  | 781/1020 [18:08<05:52,  1.47s/it]

Saving progress after 780 total rows...


Extracting functions:  80%|███████▉  | 811/1020 [18:41<03:07,  1.12it/s]

Saving progress after 810 total rows...


Extracting functions:  82%|████████▏ | 841/1020 [19:22<03:15,  1.09s/it]

Saving progress after 840 total rows...


Extracting functions:  85%|████████▌ | 871/1020 [19:56<02:53,  1.16s/it]

Saving progress after 870 total rows...


Extracting functions:  88%|████████▊ | 901/1020 [20:24<01:27,  1.36it/s]

Saving progress after 900 total rows...


Extracting functions:  91%|█████████▏| 931/1020 [20:51<01:16,  1.16it/s]

Saving progress after 930 total rows...


Extracting functions:  94%|█████████▍| 961/1020 [21:19<01:06,  1.12s/it]

Saving progress after 960 total rows...


Extracting functions:  97%|█████████▋| 991/1020 [21:45<00:21,  1.33it/s]

Saving progress after 990 total rows...


Extracting functions: 100%|██████████| 1020/1020 [22:05<00:00,  1.30s/it]

Final save...
All results saved to ../Output/llm_generated_deliberation_functions.csv



