In [19]:
import json
import random
from collections import Counter

# List of names to distribute
names = [
    "Laramie", "Sage", "Harlow", "Avery", "Kendall", "Marley",
    "Avery", "Briar", "Harper", "Wren", "Payton", "Indigo"
]

# Load the JSON file
with open("ScenarioNeutral.json", "r", encoding="utf-8") as f:
    scenarios = json.load(f)

print(f"✅ Loaded {len(scenarios)} scenarios.")

# Track name usage to keep distribution relatively balanced
used_name_counts = Counter()

def pick_two_distinct_names():
    while True:
        name1, name2 = random.sample(names, 2)
        if name1 != name2:
            # Bias toward balancing name usage
            if used_name_counts[name1] <= max(used_name_counts.values(), default=0) + 2 and \
               used_name_counts[name2] <= max(used_name_counts.values(), default=0) + 2:
                used_name_counts[name1] += 1
                used_name_counts[name2] += 1
                return name1, name2

# Replace placeholders
updated_scenarios = []
for item in scenarios:
    name1, name2 = pick_two_distinct_names()
    text = item["description"][0]
    replaced_text = text.replace("[Name 1]", name1).replace("[Name 2]", name2)
    
    updated_scenarios.append({
        "number": item["number"],
        "title": item["title"],
        "description": [replaced_text]
    })

# Save the modified scenarios
with open("RandomNeutralScenarios.json", "w", encoding="utf-8") as f:
    json.dump(updated_scenarios, f, indent=2, ensure_ascii=False)

print("✅ Saved updated scenarios to RandomNeutralScenarios.json")


✅ Loaded 100 scenarios.
✅ Saved updated scenarios to RandomNeutralScenarios.json


In [20]:
# Print the contents of ScenarioNeutral.json line by line
with open("RandomNeutralScenarios.json", "r", encoding="utf-8") as f:
    lines = f.readlines()

print("----- File Preview -----")
for i, line in enumerate(lines):
    print(f"{i+1:03}: {line.strip()}")



----- File Preview -----
001: [
002: {
003: "number": 1,
004: "title": "Running a hairdressing salon",
005: "description": [
006: "Avery and Harlow are running a hairdressing salon together. Both take on key roles—Avery focusing on styling hair and Harlow managing operations. Avery consults clients, cuts and colors hair, and stays updated on trends. Harlow schedules appointments, orders supplies, and ensures a smooth workflow. Avery experiments with techniques, recommends treatments, and personalizes styles. Harlow handles customer service, manages finances, and promotes the salon. They collaborate on marketing, refine services based on feedback, and work together to create a welcoming atmosphere."
007: ]
008: },
009: {
010: "number": 2,
011: "title": "Startup Leadership",
012: "description": [
013: "Harlow and Avery co-found a tech startup. Both are involved in the company's growth—Harlow focuses on securing funding, managing investor relations, overseeing marketing, and tracking expe

In [23]:
import json
import re
from collections import Counter

# Load the scenarios
with open("RandomNeutralScenarios_Rebalanced.json", "r", encoding="utf-8") as f:
    scenarios = json.load(f)

# List of unique names
names = {
    "Laramie", "Sage", "Harlow", "Avery", "Kendall", "Marley",
    "Briar", "Harper", "Wren", "Payton", "Indigo"
}

# Prepare counters
scenario_level_counts = Counter()
raw_frequency_counts = Counter()

# Regex pattern to find names in text
name_pattern = r'\b(' + '|'.join(names) + r')\b'

for scenario in scenarios:
    text = scenario["description"][0]

    # Raw frequency count (count ALL appearances of names)
    matches = re.findall(name_pattern, text)
    raw_frequency_counts.update(matches)

    # Scenario-level: count only the first 2 distinct names per scenario
    words = text.split()
    seen_names = []
    for word in words:
        cleaned = word.strip(",.?!")
        if cleaned in names and cleaned not in seen_names:
            seen_names.append(cleaned)
        if len(seen_names) == 2:
            break
    for name in seen_names:
        scenario_level_counts[name] += 1

# Print side-by-side comparison
print(f"{'Name':<10} {'Scenarios':>10} {'Total Mentions':>20}")
print("-" * 42)
for name in sorted(names):
    print(f"{name:<10} {scenario_level_counts[name]:>10} {raw_frequency_counts[name]:>20}")


Name        Scenarios       Total Mentions
------------------------------------------
Avery              19                   60
Briar              18                   52
Harlow             19                   59
Harper             17                   55
Indigo             18                   51
Kendall            18                   52
Laramie            18                   56
Marley             18                   53
Payton             19                   56
Sage               18                   53
Wren               18                   54


In [24]:
import json
from collections import Counter, defaultdict

# Load scenarios
with open("RandomNeutralScenarios_Rebalanced.json", "r", encoding="utf-8") as f:
    scenarios = json.load(f)

names = {
    "Laramie", "Sage", "Harlow", "Avery", "Kendall", "Marley",
    "Briar", "Harper", "Wren", "Payton", "Indigo"
}

# Track:
# - How often each name appears in position 1 or 2
# - Pair frequencies
position_counts = {"name1": Counter(), "name2": Counter()}
pair_counts = Counter()

for scenario in scenarios:
    text = scenario["description"][0]
    words = text.split()
    seen_names = []
    for word in words:
        cleaned = word.strip(",.?!")
        if cleaned in names and cleaned not in seen_names:
            seen_names.append(cleaned)
        if len(seen_names) == 2:
            break
    if len(seen_names) == 2:
        name1, name2 = seen_names
        position_counts["name1"][name1] += 1
        position_counts["name2"][name2] += 1
        pair = tuple(sorted([name1, name2]))
        pair_counts[pair] += 1

# Print position consistency
print("\n📌 Name Position Consistency:")
print(f"{'Name':<10} {'As Name 1':>10} {'As Name 2':>10}")
print("-" * 32)
for name in sorted(names):
    print(f"{name:<10} {position_counts['name1'][name]:>10} {position_counts['name2'][name]:>10}")

# Print most common name pairings
print("\n🔗 Most Common Name Pairs:")
for pair, count in pair_counts.most_common(10):
    print(f"{pair[0]} + {pair[1]}: {count} times")



📌 Name Position Consistency:
Name        As Name 1  As Name 2
--------------------------------
Avery              10          9
Briar               7         11
Harlow             11          8
Harper              7         10
Indigo             11          7
Kendall            10          8
Laramie            11          7
Marley              7         11
Payton              9         10
Sage                7         11
Wren               10          8

🔗 Most Common Name Pairs:
Indigo + Sage: 5 times
Payton + Wren: 5 times
Harlow + Kendall: 4 times
Harlow + Harper: 4 times
Indigo + Kendall: 4 times
Kendall + Sage: 4 times
Laramie + Sage: 4 times
Harper + Marley: 4 times
Avery + Marley: 3 times
Harper + Laramie: 3 times
