# Imports

In [1]:
# TODO Run all with 100 random cues , 100 best r1 strength

# TODO Interpretation: Error Rate, Compare top responses Human vs AI, Compare Top R1 cues vs random cues, Alles AI mean vs AI models solo

In [2]:
import csv
import ollama
import os
from tqdm import tqdm

# Config

In [3]:
# SWOW-style prompt template
PROMPT_TEMPLATE = """<<SYS>>
You MUST follow these rules:

1. Do NOT output reasoning, chain-of-thought, thinking process, analysis,
   hidden thoughts, XML tags like <think>, or any extra formatting.
2. Output ONLY one single line with exactly four semicolon-separated fields.
3. Format: cue;A1;A2;A3
4. A1-A3 MUST be exactly one word each (no spaces).
5. If you cannot generate A2 or A3, use exactly: No more responses
6. Any extra text makes the output INVALID.

<</SYS>>

You will perform a word association task.

Task:
Given a cue word, produce up to three single-word associations:
A1 = strongest association
A2 = second association
A3 = third association

Output format (MANDATORY):
cue;A1;A2;A3

Cue:
{cue}
"""

# Functions

In [4]:
# ----------------------------------------------------------
# FUNCTION TO QUERY OLLAMA
# ----------------------------------------------------------
def ask_ollama(model: str, prompt: str) -> str:
    result = ollama.generate(model=model, prompt=prompt)
    return result['response']

# ----------------------------------------------------------
# LOAD INPUT WORDS
# ----------------------------------------------------------
def load_cue_words(path: str):
    with open(path, newline="", encoding="utf-8") as f:
        reader = csv.reader(f)
        next(reader)  # Skip header row
        return [row[0] for row in reader]  # first column only

# ----------------------------------------------------------
# SAVE OUTPUT
# ----------------------------------------------------------
def save_results(path: str, rows):
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["model", "cue", "A1", "A2", "A3"])
        writer.writerows(rows)

# Pipeline

In [5]:
# TODO Accept wrong output formats and write "Incorrect Response" instead of skipping
def run_experiment(cues, output, model):
    results = []
    for cue in tqdm(cues):
        prompt = PROMPT_TEMPLATE.format(cue=cue)
        response = ask_ollama(model, prompt)

        if response is None:
            results.append([model, cue, "Invalid Response", "Invalid Response", "Invalid Response"])
            continue

        # Now split by semicolon
        parts = [p.strip() for p in response.split(";")]

        if len(parts) != 4:
            print(f"Warning: Unexpected format for '{cue}': {response}")
            results.append([model, cue, "Invalid Response", "Invalid Response", "Invalid Response"])
            continue

        cue_out, a1, a2, a3 = parts
        results.append([model, cue, a1, a2, a3])

    save_results(output, results)
    print(f"Done! Saved to {output}")

In [6]:
models = [
    'qwen3:0.6b',
    'qwen3:1.7b',
    'qwen3:8b', 
    'qwen3:14b', 
    'qwen3:30b',
    'gemma3:270m', 
    'gemma3:1b',
    'gemma3:4b', 
    'gemma3:12b', 
    'gemma3:27b'
    ]

In [7]:
# Top 100
input_path = os.path.join("..", "data", "datasets", "top_100_cues_by_R1.csv")
cues = load_cue_words(input_path)

for model in models:
    output_path = os.path.join("..", "data", "results", "association", "top_100_r1", f"{model.replace(':', '_').replace('.', '_')}_associations.csv")
    print(f"Starting with model: {model}")
    run_experiment(cues, output_path, model)

['becoming', 'archeology', 'joker', 'hurtful', 'nacho', 'unsafe', 'hummingbird', 'coast', 'immature', 'ligament', 'shield', 'vegetarian', 'parcel', 'madly', 'flunk', 'traveler', 'bubble gum', 'expel', 'leaning', 'juicer', 'hovercraft', 'fabric', 'captivity', 'lamb', 'failure', 'balls', 'governance', 'electricity', 'celebrity', 'masturbate', 'extroverted', 'breed', 'frustrated', 'goodwill', 'dialogue', 'unrealistic', 'hobble', 'unripe', 'boxer', 'laborer', 'atheism', 'autumn', 'repress', 'sweatshop', 'veer', 'mystical', 'redwood', 'gods', 'elbow', 'sterile', 'mailbox', 'rundown', 'execute', 'hitchhike', 'intoxicate', 'complete', 'vomit', 'civilization', 'let', 'gifted', 'whiskers', 'lore', 'Norway', 'petrol', 'Islam', 'mitten', 'cozy', 'hum', 'head', 'mints', 'increasing', 'chauffeur', 'constant', 'zeal', 'periscope', 'oddity', 'monotonous', 'stupid', 'hers', 'scribe', 'campfire', 'cartoon', 'contempt', 'cactus', 'fad', 'since', 'tom', 'devil', 'tuba', 'algae', 'crows', 'cooler', 'marti

In [8]:
# Random 100
input_path = os.path.join("..", "data", "datasets", "cues_random_100.csv")
cues = load_cue_words(input_path)

for model in models:
    output_path = os.path.join("..", "data", "results", "association", "random_100", f"{model.replace(':', '_').replace('.', '_')}_associations.csv")
    print(f"Starting with model: {model}")
    run_experiment(cues, output_path, model)

Starting with model: qwen3:0.6b


 10%|█         | 10/100 [01:21<03:19,  2.22s/it] 



 12%|█▏        | 12/100 [01:24<02:38,  1.80s/it]



 16%|█▌        | 16/100 [01:28<01:38,  1.17s/it]



 17%|█▋        | 17/100 [01:29<01:35,  1.15s/it]



 33%|███▎      | 33/100 [01:52<01:59,  1.78s/it]



 40%|████      | 40/100 [02:02<01:47,  1.80s/it]



 58%|█████▊    | 58/100 [02:25<00:52,  1.26s/it]



 59%|█████▉    | 59/100 [02:27<00:54,  1.33s/it]



 63%|██████▎   | 63/100 [02:31<00:42,  1.14s/it]



 66%|██████▌   | 66/100 [02:35<00:47,  1.40s/it]



 70%|███████   | 70/100 [02:42<00:50,  1.68s/it]



 74%|███████▍  | 74/100 [02:47<00:33,  1.27s/it]



 81%|████████  | 81/100 [02:55<00:22,  1.20s/it]



 83%|████████▎ | 83/100 [02:57<00:19,  1.16s/it]



 85%|████████▌ | 85/100 [02:59<00:16,  1.10s/it]



 89%|████████▉ | 89/100 [03:04<00:13,  1.18s/it]



 90%|█████████ | 90/100 [03:05<00:10,  1.09s/it]



 91%|█████████ | 91/100 [03:05<00:09,  1.03s/it]



 93%|█████████▎| 93/100 [03:08<00:07,  1.04s/it]



100%|██████████| 100/100 [03:16<00:00,  1.96s/it]


Done! Saved to ..\data\results\association\random_100\qwen3_0_6b_associations.csv
Starting with model: qwen3:1.7b


 12%|█▏        | 12/100 [06:49<2:40:12, 109.23s/it]



100%|██████████| 100/100 [11:56<00:00,  7.16s/it]  


Done! Saved to ..\data\results\association\random_100\qwen3_1_7b_associations.csv
Starting with model: qwen3:8b


100%|██████████| 100/100 [05:36<00:00,  3.36s/it]


Done! Saved to ..\data\results\association\random_100\qwen3_8b_associations.csv
Starting with model: qwen3:14b


100%|██████████| 100/100 [11:14<00:00,  6.74s/it] 


Done! Saved to ..\data\results\association\random_100\qwen3_14b_associations.csv
Starting with model: qwen3:30b


100%|██████████| 100/100 [1:53:20<00:00, 68.00s/it]  


Done! Saved to ..\data\results\association\random_100\qwen3_30b_associations.csv
Starting with model: gemma3:270m


  1%|          | 1/100 [00:07<11:56,  7.24s/it]




  2%|▏         | 2/100 [00:07<05:09,  3.16s/it]




  3%|▎         | 3/100 [00:07<02:57,  1.83s/it]




  4%|▍         | 4/100 [00:08<01:56,  1.21s/it]




  5%|▌         | 5/100 [00:08<01:23,  1.14it/s]




  6%|▌         | 6/100 [00:08<01:03,  1.49it/s]




  8%|▊         | 8/100 [00:09<00:42,  2.18it/s]




 11%|█         | 11/100 [00:10<00:29,  2.99it/s]




 12%|█▏        | 12/100 [00:10<00:27,  3.22it/s]




 13%|█▎        | 13/100 [00:10<00:26,  3.34it/s]




 14%|█▍        | 14/100 [00:10<00:24,  3.47it/s]




 16%|█▌        | 16/100 [00:11<00:23,  3.55it/s]




 17%|█▋        | 17/100 [00:11<00:23,  3.56it/s]




 20%|██        | 20/100 [00:12<00:26,  3.00it/s]




 22%|██▏       | 22/100 [00:13<00:26,  2.96it/s]




 23%|██▎       | 23/100 [00:13<00:24,  3.14it/s]




 24%|██▍       | 24/100 [00:14<00:23,  3.20it/s]




 26%|██▌       | 26/100 [00:14<00:23,  3.13it/s]




 27%|██▋       | 27/100 [00:14<00:22,  3.28it/s]




 28%|██▊       | 28/100 [00:15<00:20,  3.46it/s]




 29%|██▉       | 29/100 [00:15<00:19,  3.59it/s]




 32%|███▏      | 32/100 [00:16<00:18,  3.64it/s]




 33%|███▎      | 33/100 [00:16<00:18,  3.64it/s]




 35%|███▌      | 35/100 [00:17<00:18,  3.58it/s]




 37%|███▋      | 37/100 [00:17<00:17,  3.59it/s]




 38%|███▊      | 38/100 [00:18<00:17,  3.57it/s]




 40%|████      | 40/100 [00:18<00:17,  3.53it/s]




 41%|████      | 41/100 [00:18<00:16,  3.58it/s]




 42%|████▏     | 42/100 [00:19<00:16,  3.59it/s]




 43%|████▎     | 43/100 [00:19<00:16,  3.46it/s]



 44%|████▍     | 44/100 [00:19<00:16,  3.33it/s]




 45%|████▌     | 45/100 [00:20<00:16,  3.37it/s]




 47%|████▋     | 47/100 [00:20<00:17,  3.01it/s]




 48%|████▊     | 48/100 [00:21<00:16,  3.08it/s]




 49%|████▉     | 49/100 [00:21<00:16,  3.16it/s]




 50%|█████     | 50/100 [00:21<00:15,  3.20it/s]




 51%|█████     | 51/100 [00:22<00:14,  3.30it/s]




 53%|█████▎    | 53/100 [00:22<00:15,  3.09it/s]




 55%|█████▌    | 55/100 [00:23<00:13,  3.40it/s]




 58%|█████▊    | 58/100 [00:24<00:11,  3.59it/s]




 60%|██████    | 60/100 [00:24<00:11,  3.59it/s]




 62%|██████▏   | 62/100 [00:25<00:10,  3.55it/s]




 64%|██████▍   | 64/100 [00:25<00:10,  3.52it/s]




 65%|██████▌   | 65/100 [00:26<00:09,  3.58it/s]




 72%|███████▏  | 72/100 [00:28<00:10,  2.65it/s]




 80%|████████  | 80/100 [00:31<00:05,  3.44it/s]




 81%|████████  | 81/100 [00:31<00:05,  3.48it/s]




 83%|████████▎ | 83/100 [00:32<00:05,  3.38it/s]




 84%|████████▍ | 84/100 [00:32<00:04,  3.52it/s]




 85%|████████▌ | 85/100 [00:32<00:04,  3.54it/s]




 88%|████████▊ | 88/100 [00:33<00:03,  3.43it/s]




 90%|█████████ | 90/100 [00:34<00:02,  3.35it/s]




 91%|█████████ | 91/100 [00:34<00:02,  3.43it/s]




 93%|█████████▎| 93/100 [00:35<00:02,  3.31it/s]




 98%|█████████▊| 98/100 [00:37<00:00,  2.72it/s]




 99%|█████████▉| 99/100 [00:37<00:00,  2.88it/s]




100%|██████████| 100/100 [00:37<00:00,  2.63it/s]


Done! Saved to ..\data\results\association\random_100\gemma3_270m_associations.csv
Starting with model: gemma3:1b


  1%|          | 1/100 [00:10<16:55, 10.26s/it]




  2%|▏         | 2/100 [00:10<07:10,  4.39s/it]




  3%|▎         | 3/100 [00:10<04:06,  2.54s/it]

strongest; joker; Fun
strongest; Joker; Game



  4%|▍         | 4/100 [00:11<02:41,  1.68s/it]

hurtful;pain;severe
hurtful;loss;devastating


  5%|▌         | 5/100 [00:11<01:53,  1.19s/it]

nacho;bitter;sweet
nacho;delicious;warm


  6%|▌         | 6/100 [00:11<01:25,  1.10it/s]

unsafe;second;risk;threat
unsafe;third;crisis;problem


  7%|▋         | 7/100 [00:12<01:07,  1.38it/s]

hum;bird;song;sweet
hum;bird;dance;grace



  8%|▊         | 8/100 [00:12<00:53,  1.72it/s]




  9%|▉         | 9/100 [00:12<00:44,  2.07it/s]



 11%|█         | 11/100 [00:13<00:34,  2.57it/s]

shield;powerful;force
shield;safe;secure


 12%|█▏        | 12/100 [00:13<00:32,  2.69it/s]

vegetarian;healthy;nutritious
vegetarian;plant-based;organic


 13%|█▎        | 13/100 [00:14<00:31,  2.80it/s]

strong;parcel;soft
strong;parcel;warm


 16%|█▌        | 16/100 [00:15<00:26,  3.12it/s]




 19%|█▉        | 19/100 [00:15<00:25,  3.12it/s]

leaning;second;soft
leaning;third;steady


 20%|██        | 20/100 [00:16<00:27,  2.89it/s]

juicer;second association;fresh
juicer;third association;bright



 21%|██        | 21/100 [00:16<00:27,  2.92it/s]

second;vehicle;travel
third;structure;support


 23%|██▎       | 23/100 [00:17<00:24,  3.10it/s]

captivity;wild;free
 captivity;lost;alone


 25%|██▌       | 25/100 [00:17<00:23,  3.17it/s]

failure;severe;dire
failure;unresolved;critical


 26%|██▌       | 26/100 [00:18<00:22,  3.30it/s]



 27%|██▋       | 27/100 [00:18<00:24,  3.03it/s]

governance;second;system
governance;third;policy



 28%|██▊       | 28/100 [00:18<00:22,  3.18it/s]




 29%|██▉       | 29/100 [00:19<00:22,  3.11it/s]

celebrity;icon;legacy
celebrity;visionary;impact


 31%|███       | 31/100 [00:19<00:20,  3.32it/s]




 33%|███▎      | 33/100 [00:20<00:19,  3.48it/s]



 34%|███▍      | 34/100 [00:20<00:18,  3.48it/s]



 35%|███▌      | 35/100 [00:20<00:18,  3.44it/s]




 36%|███▌      | 36/100 [00:21<00:19,  3.36it/s]




 38%|███▊      | 38/100 [00:21<00:20,  3.00it/s]

unripe;pale;brown
unripe;dark;yielding


 39%|███▉      | 39/100 [00:22<00:21,  2.88it/s]

boxer;metal;strong
boxer;dance;woman


 40%|████      | 40/100 [00:22<00:19,  3.07it/s]




 41%|████      | 41/100 [00:22<00:19,  2.96it/s]

strongest;atheistic;concept
strongest;atheist;practice



 42%|████▏     | 42/100 [00:23<00:19,  2.98it/s]

autumn;color;red
autumn;weather;cool


 43%|████▎     | 43/100 [00:23<00:19,  2.93it/s]

revel;second;reveal
rethink;third;rethink


 44%|████▍     | 44/100 [00:23<00:18,  3.07it/s]




 46%|████▌     | 46/100 [00:24<00:16,  3.29it/s]




 47%|████▋     | 47/100 [00:24<00:15,  3.42it/s]



 48%|████▊     | 48/100 [00:25<00:16,  3.09it/s]

gods;second;magic
gods;third;creation


 49%|████▉     | 49/100 [00:25<00:16,  3.08it/s]

elbow;simple;evident
elbow;vital;urgent


 50%|█████     | 50/100 [00:25<00:16,  3.09it/s]

sterile;cold;still
sterile;empty;void


 52%|█████▏    | 52/100 [00:26<00:14,  3.29it/s]




 53%|█████▎    | 53/100 [00:26<00:13,  3.37it/s]




 54%|█████▍    | 54/100 [00:26<00:13,  3.42it/s]




 55%|█████▌    | 55/100 [00:27<00:14,  3.11it/s]

second;pain;ache
third;danger;risk



 56%|█████▌    | 56/100 [00:27<00:13,  3.22it/s]




 57%|█████▋    | 57/100 [00:27<00:14,  2.98it/s]

vomit;second;urgent
vomit;third;immediate



 58%|█████▊    | 58/100 [00:28<00:13,  3.13it/s]



 59%|█████▉    | 59/100 [00:28<00:13,  3.14it/s]

strong;dangerous;severe
strong;vital;true


 60%|██████    | 60/100 [00:28<00:12,  3.30it/s]




 61%|██████    | 61/100 [00:29<00:11,  3.39it/s]



 62%|██████▏   | 62/100 [00:29<00:11,  3.26it/s]

lore;powerful;bright
lore;ancient;clear


 63%|██████▎   | 63/100 [00:29<00:10,  3.39it/s]



 64%|██████▍   | 64/100 [00:30<00:12,  2.96it/s]

petrol;furry;animal;mammal
petrol;metallic;shiny;color


 65%|██████▌   | 65/100 [00:30<00:11,  3.10it/s]




 66%|██████▌   | 66/100 [00:30<00:11,  3.07it/s]

mitten;warm;soft
mitten;strong;quick


 67%|██████▋   | 67/100 [00:31<00:10,  3.05it/s]

cozy;home;safe
cozy;comfort;peace



 69%|██████▉   | 69/100 [00:31<00:09,  3.30it/s]



 70%|███████   | 70/100 [00:32<00:09,  3.12it/s]

mints;delicious;flavorful
mints;digestive;healthy


 71%|███████   | 71/100 [00:32<00:09,  3.00it/s]

increase;vital;life
increase;urgent;danger



 72%|███████▏  | 72/100 [00:32<00:08,  3.14it/s]



 73%|███████▎  | 73/100 [00:33<00:09,  2.99it/s]

constant;practical;real
constant;lasting;long



 74%|███████▍  | 74/100 [00:33<00:08,  3.11it/s]



 75%|███████▌  | 75/100 [00:33<00:08,  2.94it/s]

periscope;second;motion
periscope;third;sense


 76%|███████▌  | 76/100 [00:34<00:07,  3.12it/s]




 77%|███████▋  | 77/100 [00:34<00:07,  3.18it/s]




 78%|███████▊  | 78/100 [00:34<00:06,  3.30it/s]




 79%|███████▉  | 79/100 [00:34<00:06,  3.39it/s]



 80%|████████  | 80/100 [00:35<00:05,  3.47it/s]




 81%|████████  | 81/100 [00:35<00:05,  3.54it/s]




 83%|████████▎ | 83/100 [00:36<00:05,  3.08it/s]

contempt;disrespect;anger
contempt;passive;resentment


 84%|████████▍ | 84/100 [00:36<00:05,  3.20it/s]




 85%|████████▌ | 85/100 [00:36<00:04,  3.21it/s]




 86%|████████▌ | 86/100 [00:37<00:04,  3.24it/s]




 87%|████████▋ | 87/100 [00:37<00:04,  3.05it/s]

tom;quick;fast
tom;steady;true



 88%|████████▊ | 88/100 [00:37<00:03,  3.04it/s]

devil;dark;evil
devil;powerful;urgent



 89%|████████▉ | 89/100 [00:38<00:03,  3.02it/s]

second;throat;voice
third;pulse;heart



 90%|█████████ | 90/100 [00:38<00:03,  2.98it/s]

algae;color;green
algae;habitat;ocean


 91%|█████████ | 91/100 [00:38<00:03,  2.96it/s]

crows;feathers;bright
crows;hunting;sharp


 93%|█████████▎| 93/100 [00:39<00:02,  3.13it/s]

strong;powerful;confident
skill;expertise;achieved


 95%|█████████▌| 95/100 [00:39<00:01,  3.31it/s]




 97%|█████████▋| 97/100 [00:40<00:00,  3.41it/s]




 98%|█████████▊| 98/100 [00:40<00:00,  3.41it/s]




 99%|█████████▉| 99/100 [00:41<00:00,  3.36it/s]




100%|██████████| 100/100 [00:41<00:00,  2.42it/s]



Done! Saved to ..\data\results\association\random_100\gemma3_1b_associations.csv
Starting with model: gemma3:4b


  5%|▌         | 5/100 [00:34<04:59,  3.16s/it]




 12%|█▏        | 12/100 [00:36<00:45,  1.94it/s]




 17%|█▋        | 17/100 [00:38<00:29,  2.82it/s]




 26%|██▌       | 26/100 [00:41<00:23,  3.16it/s]




 28%|██▊       | 28/100 [00:41<00:22,  3.18it/s]




 38%|███▊      | 38/100 [00:44<00:20,  3.07it/s]




 39%|███▉      | 39/100 [00:45<00:19,  3.11it/s]




 43%|████▎     | 43/100 [00:46<00:18,  3.09it/s]




 44%|████▍     | 44/100 [00:46<00:18,  3.11it/s]




 46%|████▌     | 46/100 [00:47<00:17,  3.11it/s]




 51%|█████     | 51/100 [00:49<00:15,  3.17it/s]




 55%|█████▌    | 55/100 [00:50<00:14,  3.14it/s]



 58%|█████▊    | 58/100 [00:51<00:13,  3.11it/s]




 60%|██████    | 60/100 [00:52<00:12,  3.20it/s]




 61%|██████    | 61/100 [00:52<00:12,  3.14it/s]




 62%|██████▏   | 62/100 [00:52<00:11,  3.18it/s]




 67%|██████▋   | 67/100 [00:54<00:10,  3.06it/s]




 72%|███████▏  | 72/100 [00:55<00:08,  3.19it/s]




 76%|███████▌  | 76/100 [00:57<00:07,  3.12it/s]




 77%|███████▋  | 77/100 [00:57<00:07,  3.09it/s]




 78%|███████▊  | 78/100 [00:57<00:07,  3.10it/s]



 79%|███████▉  | 79/100 [00:58<00:06,  3.18it/s]



 80%|████████  | 80/100 [00:58<00:06,  3.22it/s]




 97%|█████████▋| 97/100 [01:04<00:00,  3.07it/s]




 99%|█████████▉| 99/100 [01:04<00:00,  3.11it/s]



100%|██████████| 100/100 [01:04<00:00,  1.54it/s]


Done! Saved to ..\data\results\association\random_100\gemma3_4b_associations.csv
Starting with model: gemma3:12b


100%|██████████| 100/100 [02:04<00:00,  1.24s/it]


Done! Saved to ..\data\results\association\random_100\gemma3_12b_associations.csv
Starting with model: gemma3:27b


100%|██████████| 100/100 [09:47<00:00,  5.87s/it] 

Done! Saved to ..\data\results\association\random_100\gemma3_27b_associations.csv





# Data preparation

In [None]:
def extract_unique_values(input_csv, output_csv, column_name, delimiter=","):
    """
    Extracts all distinct values from a given column in a CSV file and writes them
    into a new CSV file (one value per line).
    
    Args:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path to the output CSV file.
        column_name (str): Name of the column from which to collect unique values.
        delimiter (str): CSV delimiter (default=",").
    
    Returns:
        list: A list of unique values.
    """
    unique_values = set()

    # Read input CSV
    with open(input_csv, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f, delimiter=delimiter)
        if column_name not in reader.fieldnames:
            raise ValueError(f"Column '{column_name}' not found in CSV.")
        
        for row in reader:
            value = row[column_name].strip()
            if value:
                unique_values.add(value)

    # Sort for consistent output
    unique_list = sorted(unique_values)

    # Write output CSV
    with open(output_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, delimiter=delimiter)
        writer.writerow([column_name])  # header
        for val in unique_list:
            writer.writerow([val])

    return unique_list

extract_unique_values(r"C:\Users\peers\Downloads\SWOW-EN18\SWOW-EN.complete.20180827.csv", "cues.csv", "cue", ",")

In [None]:
import random

# Load all cue words
all_cues = load_cue_words("cues.csv")

# Select random subset of 100
random_subset = random.sample(all_cues, min(100, len(all_cues)))

# Save to new CSV
output_file = "cues_random_100.csv"
with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["cue"])  # header
    for cue in random_subset:
        writer.writerow([cue])

print(f"Saved {len(random_subset)} random cue words to {output_file}")

In [None]:
import pandas as pd

# Load CSV
df = pd.read_csv(r"C:\Users\peers\Downloads\SWOW-EN18\strength.SWOW-EN.R1.20180827.csv", sep="\t")

# Sort by R1 descending
df_sorted = df.sort_values(by="R1.Strength", ascending=False)

# Keep only the highest-R1 row per cue
df_unique_cues = df_sorted.drop_duplicates(subset="cue", keep="first")

# Select top 100 cues
top_100 = df_unique_cues.head(100)

# Save to new CSV
top_100.to_csv("top_100_cues_by_R1.csv", index=False)


# Response Strength Analysis

In [None]:
# TODO Output as strength.SWOW.EN.R1.AI with same format
# TODO Look at how many reponses are invalid
# TODO Look at top responses from AI and Humans and compare

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import glob

# Load SWOW strength data (R1 and R123) - they are TAB-delimited
swow_strength_r1 = pd.read_csv(r"C:\Users\peers\Downloads\SWOW-EN18\strength.SWOW-EN.R1.20180827.csv", sep="\t")
swow_strength_r123 = pd.read_csv(r"C:\Users\peers\Downloads\SWOW-EN18\strength.SWOW-EN.R123.20180827.csv", sep="\t")

# Create lookup dictionaries for SWOW strength
# Key: (cue, response), Value: strength
swow_r1_lookup = {}
swow_r123_lookup = {}

for _, row in swow_strength_r1.iterrows():
    key = (row['cue'], row['response'])
    swow_r1_lookup[key] = row['R1.Strength']

for _, row in swow_strength_r123.iterrows():
    key = (row['cue'], row['response'])
    swow_r123_lookup[key] = row['R123.Strength']

print(f"Loaded {len(swow_r1_lookup)} cue-response pairs from SWOW R1")
print(f"Loaded {len(swow_r123_lookup)} cue-response pairs from SWOW R123")

# Load all model files
model_files = sorted(glob.glob("../data/results/association/*_associations.csv"))
print(f"\nFound {len(model_files)} model files")
total_models = len(model_files)

# Dictionaries to store A1 and A123 responses by cue
all_model_a1 = {}
all_model_a123 = {}

# Load A1 and A123 responses from all models
for model_file in model_files:
    model_name = model_file.split("\\")[-1].replace("_associations.csv", "")
    print(f"Loading responses from {model_name}...")
    
    model_data = pd.read_csv(model_file)
    
    for _, row in model_data.iterrows():
        cue = str(row['cue']).lower()
        a1_response = str(row['A1'])
        a2_response = str(row['A2'])
        a3_response = str(row['A3'])
        
        # Store A1
        if cue not in all_model_a1:
            all_model_a1[cue] = {}
        all_model_a1[cue][model_name] = a1_response
        
        # Store all three responses for A123
        if cue not in all_model_a123:
            all_model_a123[cue] = []
        
        for resp in [a1_response, a2_response, a3_response]:
            if resp != 'No more responses':
                all_model_a123[cue].append(resp)

# Calculate A1 strength (per unique response)
output_data = []

for cue in sorted(all_model_a1.keys()):
    models_responses_a1 = all_model_a1[cue]
    
    # Get unique A1 responses for this cue
    unique_a1s = set(models_responses_a1.values())
    
    for response in unique_a1s:
        if response == 'No more responses':
            continue
        
        # Count how many models gave this as A1
        count_models_a1 = sum(1 for r in models_responses_a1.values() if r == response)
        
        # Calculate A1 strength
        model_a1_strength = count_models_a1 / total_models
        
        # Calculate A123 strength (how often this appears in any position A1/A2/A3)
        if cue in all_model_a123:
            count_a123 = all_model_a123[cue].count(response)
            total_a123_responses = len(all_model_a123[cue])
            model_a123_strength = count_a123 / total_a123_responses if total_a123_responses > 0 else 0
        else:
            model_a123_strength = 0
        
        # Look up in SWOW
        swow_key = (cue, response.lower())
        swow_r1_strength = swow_r1_lookup.get(swow_key, None)
        swow_r123_strength = swow_r123_lookup.get(swow_key, None)
        
        output_data.append({
            'cue': cue,
            'response': response,
            'model_a1_count': count_models_a1,
            'total_models': total_models,
            'model_a1_strength': round(model_a1_strength, 3),
            'model_a123_strength': round(model_a123_strength, 3),
            'swow_r1_strength': swow_r1_strength if swow_r1_strength is not None else 'n.a.',
            'swow_r123_strength': swow_r123_strength if swow_r123_strength is not None else 'n.a.'
        })

# Create DataFrame and sort by cue, then by model_a1_strength descending
output_df = pd.DataFrame(output_data)
output_df = output_df.sort_values(['cue', 'model_a1_strength'], ascending=[True, False])

# Format SWOW columns for display (round if numeric)
def format_swow(x):
    if x == 'n.a.':
        return 'n.a.'
    else:
        return round(float(x), 3)

output_df['swow_r1_strength'] = output_df['swow_r1_strength'].apply(format_swow)
output_df['swow_r123_strength'] = output_df['swow_r123_strength'].apply(format_swow)

# Save to CSV
output_file = "../data/results/model_a1_vs_swow_strength.csv"
output_df.to_csv(output_file, index=False)

print(f"\n" + "="*80)
print(f"Consolidated A1 response strength comparison saved to:")
print(f"{output_file}")
print("="*80)
print(f"\nTotal unique cue-response pairs: {len(output_df)}")
print(f"Total unique cues: {output_df['cue'].nunique()}")

print("\nFirst 20 rows:")
print(output_df.head(20).to_string(index=False))

print("\n\nColumn Explanation:")
print("-" * 80)
print("cue                  : The cue word")
print("response             : The response from models")
print("model_a1_count       : How many models gave this as A1 (first response)")
print("total_models         : Total number of models analyzed")
print("model_a1_strength    : Proportion of models that gave this as A1 (0-1)")
print("model_a123_strength  : Proportion across all A1/A2/A3 positions (0-1)")
print("swow_r1_strength     : Strength in SWOW R1 (first response only)")
print("swow_r123_strength   : Strength in SWOW R123 (all three positions)")
print("-" * 80)