# Imports

In [None]:
import csv
import ollama
import os


# Config

In [None]:
# SWOW-style prompt template
PROMPT_TEMPLATE = """<<SYS>>
You MUST follow these rules:

1. Do NOT output reasoning, chain-of-thought, thinking process, analysis,
   hidden thoughts, XML tags like <think>, or any extra formatting.
2. Output ONLY one single line with exactly four semicolon-separated fields.
3. Format: cue;A1;A2;A3
4. A1-A3 MUST be exactly one word each (no spaces).
5. If you cannot generate A2 or A3, use exactly: No more responses
6. Any extra text makes the output INVALID.

<</SYS>>

You will perform a word association task.

Task:
Given a cue word, produce up to three single-word associations:
A1 = strongest association
A2 = second association
A3 = third association

Output format (MANDATORY):
cue;A1;A2;A3

Cue:
{cue}
"""

# Functions

In [None]:
# ----------------------------------------------------------
# FUNCTION TO QUERY OLLAMA
# ----------------------------------------------------------
def ask_ollama(model: str, prompt: str) -> str:
    result = ollama.generate(model=model, prompt=prompt)
    return result['response']

# ----------------------------------------------------------
# LOAD INPUT WORDS
# ----------------------------------------------------------
def load_cue_words(path: str):
    with open(path, newline="", encoding="utf-8") as f:
        reader = csv.reader(f)
        return [row[0] for row in reader]  # first column only

# ----------------------------------------------------------
# SAVE OUTPUT
# ----------------------------------------------------------
def save_results(path: str, rows):
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["model", "cue", "A1", "A2", "A3"])
        writer.writerows(rows)

# Pipeline

In [None]:
def run_experiment(cues, output, model):
    results = []
    for cue in cues:
        prompt = PROMPT_TEMPLATE.format(cue=cue)
        response = ask_ollama(model, prompt)
        if response is None:
            continue

        # Now split by semicolon
        parts = [p.strip() for p in response.split(";")]

        if len(parts) != 4:
            print(f"Warning: Unexpected format for '{cue}': {response}")
            continue

        cue_out, a1, a2, a3 = parts
        results.append([model, cue_out, a1, a2, a3])

    save_results(output, results)
    print(f"Done! Saved to {output}")

In [None]:
input_path = os.path.join("..", "data", "datasets", "cues_random_100.csv")
cues = load_cue_words(input_path)

#models = ['qwen3:8b', 'qwen3:14b', 'qwen3:30b', 'gemma3:4b', 'gemma3:12b', 'gemma3:27b', 'llama3.1:8b']
models = ['qwen3:14b']

for model in models:
    output_path = os.path.join("..", "data", "results", f'{model.replace(':', '_').replace('.', '_')}_associations.csv')
    print(f"Starting with model: {model}")
    run_experiment(cues, output_path, model)

# Data preparation

In [None]:
def extract_unique_values(input_csv, output_csv, column_name, delimiter=","):
    """
    Extracts all distinct values from a given column in a CSV file and writes them
    into a new CSV file (one value per line).
    
    Args:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path to the output CSV file.
        column_name (str): Name of the column from which to collect unique values.
        delimiter (str): CSV delimiter (default=",").
    
    Returns:
        list: A list of unique values.
    """
    unique_values = set()

    # Read input CSV
    with open(input_csv, newline="", encoding="utf-8") as f:
        reader = csv.DictReader(f, delimiter=delimiter)
        if column_name not in reader.fieldnames:
            raise ValueError(f"Column '{column_name}' not found in CSV.")
        
        for row in reader:
            value = row[column_name].strip()
            if value:
                unique_values.add(value)

    # Sort for consistent output
    unique_list = sorted(unique_values)

    # Write output CSV
    with open(output_csv, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f, delimiter=delimiter)
        writer.writerow([column_name])  # header
        for val in unique_list:
            writer.writerow([val])

    return unique_list

extract_unique_values(r"C:\Users\peers\Downloads\SWOW-EN18\SWOW-EN.complete.20180827.csv", "cues.csv", "cue", ",")

In [None]:
import random

# Load all cue words
all_cues = load_cue_words("cues.csv")

# Select random subset of 100
random_subset = random.sample(all_cues, min(100, len(all_cues)))

# Save to new CSV
output_file = "cues_random_100.csv"
with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["cue"])  # header
    for cue in random_subset:
        writer.writerow([cue])

print(f"Saved {len(random_subset)} random cue words to {output_file}")

# Response Strength Analysis

Response strength measures how dominant/consistent a response is for a given cue.
- **Strength = frequency of response / total number of responses for that cue**
- Higher strength = that response is the clear winner for that cue
- Lower strength = responses are more evenly distributed

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
import glob

# Load SWOW strength data (R1 and R123) - they are TAB-delimited
swow_strength_r1 = pd.read_csv(r"C:\Users\peers\Downloads\SWOW-EN18\strength.SWOW-EN.R1.20180827.csv", sep="\t")
swow_strength_r123 = pd.read_csv(r"C:\Users\peers\Downloads\SWOW-EN18\strength.SWOW-EN.R123.20180827.csv", sep="\t")

# Create lookup dictionaries for SWOW strength
# Key: (cue, response), Value: strength
swow_r1_lookup = {}
swow_r123_lookup = {}

for _, row in swow_strength_r1.iterrows():
    key = (row['cue'], row['response'])
    swow_r1_lookup[key] = row['R1.Strength']

for _, row in swow_strength_r123.iterrows():
    key = (row['cue'], row['response'])
    swow_r123_lookup[key] = row['R123.Strength']

print(f"Loaded {len(swow_r1_lookup)} cue-response pairs from SWOW R1")
print(f"Loaded {len(swow_r123_lookup)} cue-response pairs from SWOW R123")

# Load all model files
model_files = sorted(glob.glob("../data/results/association/*_associations.csv"))
print(f"\nFound {len(model_files)} model files")
total_models = len(model_files)

# Dictionaries to store A1 and A123 responses by cue
all_model_a1 = {}
all_model_a123 = {}

# Load A1 and A123 responses from all models
for model_file in model_files:
    model_name = model_file.split("\\")[-1].replace("_associations.csv", "")
    print(f"Loading responses from {model_name}...")
    
    model_data = pd.read_csv(model_file)
    
    for _, row in model_data.iterrows():
        cue = str(row['cue']).lower()
        a1_response = str(row['A1'])
        a2_response = str(row['A2'])
        a3_response = str(row['A3'])
        
        # Store A1
        if cue not in all_model_a1:
            all_model_a1[cue] = {}
        all_model_a1[cue][model_name] = a1_response
        
        # Store all three responses for A123
        if cue not in all_model_a123:
            all_model_a123[cue] = []
        
        for resp in [a1_response, a2_response, a3_response]:
            if resp != 'No more responses':
                all_model_a123[cue].append(resp)

# Calculate A1 strength (per unique response)
output_data = []

for cue in sorted(all_model_a1.keys()):
    models_responses_a1 = all_model_a1[cue]
    
    # Get unique A1 responses for this cue
    unique_a1s = set(models_responses_a1.values())
    
    for response in unique_a1s:
        if response == 'No more responses':
            continue
        
        # Count how many models gave this as A1
        count_models_a1 = sum(1 for r in models_responses_a1.values() if r == response)
        
        # Calculate A1 strength
        model_a1_strength = count_models_a1 / total_models
        
        # Calculate A123 strength (how often this appears in any position A1/A2/A3)
        if cue in all_model_a123:
            count_a123 = all_model_a123[cue].count(response)
            total_a123_responses = len(all_model_a123[cue])
            model_a123_strength = count_a123 / total_a123_responses if total_a123_responses > 0 else 0
        else:
            model_a123_strength = 0
        
        # Look up in SWOW
        swow_key = (cue, response.lower())
        swow_r1_strength = swow_r1_lookup.get(swow_key, None)
        swow_r123_strength = swow_r123_lookup.get(swow_key, None)
        
        output_data.append({
            'cue': cue,
            'response': response,
            'model_a1_count': count_models_a1,
            'total_models': total_models,
            'model_a1_strength': round(model_a1_strength, 3),
            'model_a123_strength': round(model_a123_strength, 3),
            'swow_r1_strength': swow_r1_strength if swow_r1_strength is not None else 'n.a.',
            'swow_r123_strength': swow_r123_strength if swow_r123_strength is not None else 'n.a.'
        })

# Create DataFrame and sort by cue, then by model_a1_strength descending
output_df = pd.DataFrame(output_data)
output_df = output_df.sort_values(['cue', 'model_a1_strength'], ascending=[True, False])

# Format SWOW columns for display (round if numeric)
def format_swow(x):
    if x == 'n.a.':
        return 'n.a.'
    else:
        return round(float(x), 3)

output_df['swow_r1_strength'] = output_df['swow_r1_strength'].apply(format_swow)
output_df['swow_r123_strength'] = output_df['swow_r123_strength'].apply(format_swow)

# Save to CSV
output_file = "../data/results/model_a1_vs_swow_strength.csv"
output_df.to_csv(output_file, index=False)

print(f"\n" + "="*80)
print(f"Consolidated A1 response strength comparison saved to:")
print(f"{output_file}")
print("="*80)
print(f"\nTotal unique cue-response pairs: {len(output_df)}")
print(f"Total unique cues: {output_df['cue'].nunique()}")

print("\nFirst 20 rows:")
print(output_df.head(20).to_string(index=False))

print("\n\nColumn Explanation:")
print("-" * 80)
print("cue                  : The cue word")
print("response             : The response from models")
print("model_a1_count       : How many models gave this as A1 (first response)")
print("total_models         : Total number of models analyzed")
print("model_a1_strength    : Proportion of models that gave this as A1 (0-1)")
print("model_a123_strength  : Proportion across all A1/A2/A3 positions (0-1)")
print("swow_r1_strength     : Strength in SWOW R1 (first response only)")
print("swow_r123_strength   : Strength in SWOW R123 (all three positions)")
print("-" * 80)

Loaded 483636 cue-response pairs from SWOW R1
Loaded 978908 cue-response pairs from SWOW R123

Found 7 model files
Loading A1 responses from gemma3_12b...
Loading A1 responses from gemma3_27b...
Loading A1 responses from gemma3_4b...
Loading A1 responses from llama3_1_8b...
Loading A1 responses from qwen3_14b...
Loading A1 responses from qwen3_30b...
Loading A1 responses from qwen3_8b...

Consolidated A1 response strength comparison saved to:
../data/results/model_a1_vs_swow_strength.csv

Total unique cue-response pairs: 429
Total unique cues: 204

First 20 rows:
       cue   response  model_count  total_models  model_strength swow_r1_strength swow_r123_strength
    absorb     sponge            1             7        0.142857            0.258              0.145
    active  energetic            1             7        0.142857             0.07              0.055
  airplane     flight            1             7        0.142857              0.1              0.071
     algae      green     