# Sustaininc Cyber Awareness Paper Code

In [None]:
%pip install -q matplotlib pandas numpy scipy

In [None]:
import csv
import collections
from collections import Counter, defaultdict
import json
from datetime import datetime
import calendar
import itertools
import math

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.colors import Normalize
from scipy.stats import spearmanr

In [None]:
REPORT_CSV = 'data/report.csv'
REPORT_JSON = 'data/report.json'

## Section 1: Conversion

Converting the data from the output format of Terranova CSV to a JSON with pre-parsed variables

In [None]:
# Read Terranova CSV report
with open(REPORT_CSV, mode='r', newline='', encoding='utf-8', errors='replace') as file:
    reader = csv.reader(file, delimiter=',', quotechar='"')
    data = []
    for row in reader:
        data.append(row)

header = data[0]
header[0] = 'Scenario'
rows = data[1:]
data = None
len(rows), header

In [None]:
def coli(name):
    return header.index(name)

In [None]:
# List unique companies
unique_company_counter = Counter()
for row in rows:
    company = row[coli('Company')]
    unique_company_counter[company] += 1
    
unique_company_counter

In [None]:
# List unique groups
unique_group_counter = Counter()
for row in rows:
    group = row[coli('Simulation Title')].split(' - ')[0]
    unique_group_counter[group] += 1
    
unique_group_counter

In [None]:
# Convert CSV to JSON

# Exclude companies with too few employees
too_small_companies = [ '... .com', ]

report = []
for row in rows:
    title_parts = row[coli('Simulation Title')].split(' - ')
    assert len(title_parts) >= 2, f"Unexpected Simulation Title format: {row[coli('Simulation Title')]}"
    
    email = row[coli('Email')]
    email_domain = email.split('@')[-1]
    if email_domain in too_small_companies:
        continue
    
    added = {
        'scenario': row[coli('Scenario')],
        'group': title_parts[0].replace(' (without Head of Legal)', '').replace(' (1)', ''),
        'title': ' - '.join(title_parts[1:]),
        'date': row[coli('Simulation Event Date')],
        'outcome': row[coli('Event Name')] if row[coli('Event Name')] in ['Clicked', 'AttachmentDownloaded', 'CompletedForm'] else 'Nothing',
        'flagged_unsafe': row[coli('Event0None1Unsafe')] == '1',
        
        'person': {
            'email': row[coli('Email')] if '@' in row[coli('Email')] else 'anonymous',
            'is_male': row[coli('F0M1')] == '1',
            'country': row[coli('Country')],
            'company': email_domain,
        },
        
        'properties': {
            'personalized': row[coli('Personal')] == '1',
            'urgent': row[coli('Urgency')] == '1',
            'authority': row[coli('Authority')] == '1',
            'internal': row[coli('Corporate')] == '1',
            'fear': row[coli('Fear')] == '1',
            'curiosity': row[coli('Curiosity')] == '1',
            'help': row[coli('Help')] == '1',
            'reward': row[coli('Greed')] == '1',
        }
    }
    
    # Fix an invalid property combination in the data
    if added['scenario'] == 'Google Drive - File is pending approval':
        added['properties']['authority'] = True
        added['properties']['curiosity'] = True
    
    assert row[coli('Personal')] != row[coli('Anonym')] or row[coli('Personal')] == '', f"An event cannot be both personal and non personal: {row}"
    assert row[coli('Corporate')] != row[coli('External')] or row[coli('Corporate')] == '', f"An event cannot be both internal and external: {row}"
    report.append(added)
    
with open(REPORT_JSON, 'w', encoding='utf-8') as f:
    json.dump(report, f, ensure_ascii=False, indent=4)

## Section 2: Charts & Statistics

In [None]:
# Compute monthly success rates by group and plot

def parse_dt(s):
    try:
        return datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")
    except ValueError:
        try:
            return datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
        except ValueError:
            return None

# Decide what counts as a successful phishing event
# Here we consider records with flagged_unsafe == True as success
# You can change this to outcome in {"Clicked", "AttachmentDownloaded", "CompletedForm"}
def is_success(row):
    if 'flagged_unsafe' in row:
        return bool(row['flagged_unsafe'])
    if 'outcome' in row:
        return row['outcome'] in {"Clicked", "AttachmentDownloaded", "CompletedForm"}
    return False

# Aggregate counts per group per month for a given year
counts = collections.defaultdict(lambda: collections.defaultdict(lambda: {'success': 0, 'total': 0}))

# Choose latest year present (should only be 2024 in our data)
years = set()
for r in report:
    dt = parse_dt(r.get('date', ''))
    if dt:
        years.add(dt.year)
year_to_plot = max(years)

for r in report:
    dt = parse_dt(r.get('date', ''))
    if not dt or dt.year != year_to_plot:
        continue
    grp = r.get('person', {}).get('email', '@').split('@')[-1]  # Group by email domain
    m = dt.month
    counts[grp][m]['total'] += 1
    if is_success(r):
        counts[grp][m]['success'] += 1

# Compute total success percentage across all groups by month
totals_by_month = {m: {'success': 0, 'total': 0} for m in range(1, 13)}
for m in range(1, 13):
    for mdata in counts.values():
        c = mdata.get(m, {'success': 0, 'total': 0})
        totals_by_month[m]['success'] += c['success']
        totals_by_month[m]['total'] += c['total']
pct_all = [
    (totals_by_month[m]['success'] / totals_by_month[m]['total'] * 100.0) if totals_by_month[m]['total'] else 0.0
    for m in range(1, 13)
]

# Prepare data for plotting
months = [calendar.month_abbr[m] for m in range(1, 13)]

plt.figure(figsize=(10, 6))
for grp, mdata in sorted(counts.items()):
    pct = []
    for m in range(1, 13):
        c = mdata.get(m, {'success': 0, 'total': 0})
        rate = (c['success'] / c['total'] * 100.0) if c['total'] else 0.0
        pct.append(rate)
    for i, p in enumerate(pct):
        if p == 0.0:
            if i == 0:
                pct[0] = pct[1]
            elif i == len(pct) - 1:
                pct[i] = pct[i - 1]
            else:
                pct[i] = (pct[i - 1] + pct[i + 1]) / 2
    plt.plot(months, pct, marker='o', label=str(grp))

# Plot overall line across all groups
plt.plot(months, pct_all, marker='o', linestyle='--', linewidth=2.5, color='black', label='All groups')

plt.title(f'Successful phishing rate by group - {year_to_plot}')
plt.xlabel('Month')
plt.ylabel('Success rate (%)')
plt.ylim(0, 40)
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(title='Group', bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0.)
plt.tight_layout()
plt.show()

In [None]:
# Overall monthly success rates
success_months = [0] * 12
total_months = [0] * 12

for r in report:
    month = int(r['date'].split('-')[1])
    if r['flagged_unsafe']:
        success_months[month-1] += 1
    total_months[month-1] += 1
    
success_percent = [ s/t*100 for s, t in zip(success_months, total_months) ]

success_months, total_months, success_percent

In [None]:
# Overall-only chart using previously computed pct_all, months, and year_to_plot
plt.figure(figsize=(10, 6))

plt.plot(months, pct_all, marker='o', linestyle='--', linewidth=2.5, color='black', label='All groups')
plt.title(f'Overall successful phishing rate - {year_to_plot}')
plt.xlabel('Month')
plt.ylabel('Success rate (%)')
#plt.ylim(0, 100)
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# Enhanced overall chart with shaded periods and training markers
plt.figure(figsize=(9, 5))
months = [calendar.month_abbr[m] for m in range(1, 13)]
print([round(p, 1) for p in success_percent])
plt.plot(months, success_percent, marker='o', linestyle='--', linewidth=2.5, color='red', label='All participants')

# Shade background for Jul-Aug (vacatin period) and Sep-Oct (hiring period)
ax = plt.gca()
jul_i, aug_i = months.index('Aug'), months.index('Aug')
sep_i, oct_i = months.index('Sep'), months.index('Oct')
may_i = months.index('May')
ax.axvspan(jul_i - 1.15, aug_i + 0.2, color='tab:blue', alpha=0.12, zorder=0)
ax.axvspan(sep_i - 0.8, oct_i -0.2, color='tab:green', alpha=0.12, zorder=0)
ax.axvspan(may_i - 0.5, may_i + 0.5, color='tab:green', alpha=0.12, zorder=0)

# Mark global training months (June, October)
training_months = ['Jun', 'Oct']
for tm in training_months:
    plt.axvline(x=tm, color='tab:orange', linestyle=':', linewidth=2, alpha=0.9)
for tm in training_months:
    idx = months.index(tm)
    plt.scatter([tm], [success_percent[idx]], color='tab:orange', s=60, marker='')
# Legend proxy for training marker/line
plt.plot([], [], color='tab:orange', linestyle=':', marker='', markerfacecolor='tab:orange', label='Security training')

plt.axhline(y=4.1, color='green', linestyle=':')
plt.plot([], [], color='tab:green', linestyle=':', marker='', markerfacecolor='tab:green', label='Industry average (4.1%)')

# Add text labels near the top of shaded regions
ax.set_ylim(0, max(success_percent) * 1.1)
y_top = ax.get_ylim()[1]
ax.text((jul_i + aug_i) / 2 - 0.5, y_top * 0.95, 'Vacation\nperiod', ha='center', va='top', fontsize=9, color='tab:blue')
ax.text((sep_i + oct_i) / 2 - 0.5, y_top * 0.95, 'Hiring\nperiod', ha='center', va='top', fontsize=9, color='tab:green')
ax.text((may_i), y_top * 0.95, 'Hiring\nperiod', ha='center', va='top', fontsize=9, color='tab:green')

plt.xlabel('Month')
plt.ylabel('Phishing success rate (%)')
#plt.ylim(0, 100)
plt.grid(True, linestyle='--', alpha=0.3)

# Extend legend with shaded period entries (pass both handles and labels)
period_handles = [
    Patch(facecolor='tab:blue', alpha=0.12, edgecolor='none', label='Vacation period'),
    Patch(facecolor='tab:green', alpha=0.12, edgecolor='none', label='Hiring period'),
    Patch(facecolor='tab:green', alpha=0.12, edgecolor='none', label='Hiring period')
]
handles, labels = ax.get_legend_handles_labels()
handles = handles + period_handles[:2]
labels = labels + [h.get_label() for h in period_handles][:2]
plt.legend(handles=handles, labels=labels, loc='lower left')

plt.tight_layout()
plt.savefig('plots/success_over_time.pdf')
plt.show()

In [None]:
# Build matrix of success rates by group (rows) and month (cols) for the selected year on a heatmap

months = [calendar.month_abbr[m] for m in range(1, 13)]
all_groups = sorted(counts.keys(), key=lambda g: sum(counts[g].get(m, {'total': 0})['total'] for m in range(1, 13)), reverse=True)

data = np.full((len(all_groups), 12), np.nan, dtype=float)  # percent
succ = np.zeros((len(all_groups), 12), dtype=int)           # flagged successes
tot  = np.zeros((len(all_groups), 12), dtype=int)           # totals
for i, grp in enumerate(all_groups):
    for m in range(1, 13):
        c = counts[grp].get(m, {'success': 0, 'total': 0})
        if c['total']:
            data[i, m-1] = (c['success'] / c['total']) * 100.0
            succ[i, m-1] = int(c['success'])
            tot[i, m-1]  = int(c['total'])

fig, ax = plt.subplots(figsize=(12, max(4, 0.4*len(all_groups) + 2)))
try:
    cmap = plt.cm.get_cmap('YlOrRd').copy()
except Exception:
    cmap = plt.cm.get_cmap('YlOrRd')
im = ax.imshow(data, aspect='auto', interpolation='nearest', cmap=cmap, vmin=0, vmax=100)

# Axes ticks and labels
ax.set_xticks(range(12))
ax.set_xticklabels(months)
ax.set_yticks(range(len(all_groups)))
ax.set_yticklabels([str(g) for g in all_groups])
ax.set_xlabel('Month')
ax.set_ylabel('Group')

# Annotate each cell with percentage on first line and (flagged,total) below
for i in range(len(all_groups)):
    for j in range(12):
        val = data[i, j]
        if not np.isnan(val):
            ax.text(j, i, f'{val:.0f}%\n({succ[i, j]},{tot[i, j]})', ha='center', va='center', fontsize=8, color='black', linespacing=1.2)

cbar = plt.colorbar(im, ax=ax, shrink=0.85)
cbar.set_label('Success rate (%)')

plt.tight_layout()
plt.show()

In [None]:
# Count successful phishing events per participant across the entire dataset

counts_by_email = Counter()
for r in report:
    try:
        email = (r.get('person') or {}).get('email')
        if email:
            if is_success(r):
                counts_by_email[email] += 1
            else:
                counts_by_email[email] += 0
    except Exception:
        pass
total_unique_people = len(counts_by_email)

# Histogram: times fell victim (k) -> number of people
hist = Counter(counts_by_email.values())
xs = list(range(1, max(hist.keys()) + 1))
ys = [hist.get(k, 0) for k in xs]

# Add zero-success people
xs = [0] + xs
ys = [total_unique_people - sum(ys)] + ys

# Colors for bars (first green, others get progressively redder)
colors = [ 'green' ] + [ 'red' ] * (len(xs) - 1)

# Normalize results to percentages
ys = [round(y / total_unique_people * 100, 1) for y in ys]

plt.figure(figsize=(5, 3))
bars = plt.bar(xs, ys, color=colors, alpha=0.8, edgecolor='white')
plt.xlabel('Times fell victim')
plt.ylabel('Percentage of participants (%)')
plt.xticks(xs)
plt.grid(axis='y', linestyle='--', alpha=0.3)
plt.ylim(0, 100)

# Annotate bars with counts
for rect, val in zip(bars, ys):
    if val > 0:
        plt.text(rect.get_x() + rect.get_width() / 2, rect.get_height(), f'{val}%', ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.savefig('plots/victim_count_distribution.pdf')
plt.show()

In [None]:
# Per-group distribution of per-person successful phishing events (groups with >= 10 members)

# Build per-group membership and per-person success counts
group_members = defaultdict(set)
group_email_success = defaultdict(Counter)
for r in report:
    grp = r.get('group', 'Unknown')
    email = (r.get('person') or {}).get('email')
    if not email:
        continue
    group_members[grp].add(email)
    if is_success(r):
        group_email_success[grp][email] += 1

# Keep only groups with at least 10 unique members
eligible_groups = [g for g, members in group_members.items() if len(members) >= 10]

# Build histograms (k -> number of people) for each eligible group
hists = {}
max_k = 0
for g in eligible_groups:
    hist = Counter(group_email_success[g].values())  # counts per person (exclude zeros)
    if hist:
        max_k = max(max_k, max(hist.keys()))
    hists[g] = hist

xs = list(range(1, max_k + 1))

# Sort groups by size (desc) for plotting order
eligible_groups.sort(key=lambda g: len(group_members[g]), reverse=True)

# Color cycle
cmap = plt.get_cmap('tab20')
colors = [cmap(i % cmap.N) for i in range(len(eligible_groups))]

plt.figure(figsize=(11, 6))
for idx, g in enumerate(eligible_groups):
    hist = hists.get(g, Counter())
    ys = [hist.get(k, 0) for k in xs]
    # Skip lines that are completely zero (no successes in that group)
    if all(v == 0 for v in ys):
        continue
    plt.plot(xs, ys, marker='o', linewidth=2, alpha=0.7, color=colors[idx],
                label=f"{g} (n={len(group_members[g])})")

plt.xlabel('Times fell victim')
plt.ylabel('Number of people in group')
plt.xticks(xs)
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(title='Group', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
email_template_sent = [ 1, 2, 3 ]
email_template_dangerous_action = [ 1, 2, 3 ]

[round(danger/total*100, 1) for total,danger in zip(email_template_sent, email_template_dangerous_action)]

In [None]:
# Effectiveness of emotional cues over time

# Discover all property keys available in the dataset
prop_keys = set()
for r in report:
    props = r.get('properties') or {}
    prop_keys.update(props.keys())


# Aggregate per-month stats for each property (when property is True) in the selected year
stats = {k: [ {'succ': 0, 'tot': 0} for _ in range(12) ] for k in sorted(prop_keys)}
for r in report:
    dt = parse_dt(r.get('date', ''))
    if not dt or dt.year != year_to_plot:
        continue
    props = r.get('properties') or {}
    mi = dt.month - 1
    for k in prop_keys:
        if props.get(k):
            stats[k][mi]['tot'] += 1
            if is_success(r):
                stats[k][mi]['succ'] += 1

months = [calendar.month_abbr[m] for m in range(1, 13)]

# Order properties by total sample size (descending) for clearer legend priority
order = sorted(prop_keys, key=lambda k: sum(m['tot'] for m in stats[k]), reverse=True)

plt.figure(figsize=(12, 6))
for k in order:
    rates = [ (m['succ'] / m['tot'] * 100.0) if m['tot'] else float('nan') for m in stats[k] ]
    if all(math.isnan(v) for v in rates):
        continue
    label = k.replace('_', ' ').title()
    plt.plot(months, rates, marker='o', linewidth=2, alpha=0.85, label=label)

plt.xlabel('Month')
plt.ylabel('Success rate when cue present (%)')
plt.ylim(0, 18)
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(title='Cue', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
# New-employee share of successful phishing attempts over time

# First-ever phishing attempt (any outcome) per email across the whole dataset
events_by_email = defaultdict(list)
for r in report:
    email = (r.get('person') or {}).get('email')
    if not email:
        continue
    dt = parse_dt(r.get('date', ''))
    if not dt:
        continue
    events_by_email[email].append(dt)

first_event_month = {}  # email -> (year, month)
for email, dts in events_by_email.items():
    fd = min(dts)
    first_event_month[email] = (fd.year, fd.month)

# For the selected year, compute monthly totals of successes and successes from new employees
total_success = [0] * 12
new_success = [0] * 12
for r in report:
    dt = parse_dt(r.get('date', ''))
    if not dt or dt.year != year_to_plot:
        continue
    if not is_success(r):
        continue
    mi = dt.month - 1
    total_success[mi] += 1
    email = (r.get('person') or {}).get('email')
    if not email:
        continue
    fm = first_event_month.get(email)
    # Count as new employee only if their first-ever attempt is this month (excluding January)
    if fm == (year_to_plot, dt.month) and dt.month != 1:
        new_success[mi] += 1

# Compute percentages; mark January as N/A
pct_new = [ (new_success[i] / total_success[i] * 100.0) if total_success[i] else float('nan') for i in range(12) ]
pct_new[0] = float('nan')  # January does not count

months = [calendar.month_abbr[m] for m in range(1, 13)]

# Overall Feb–Dec average reference line (if data exists)
den = sum(total_success[1:])
overall_avg = (sum(new_success[1:]) / den * 100.0) if den else float('nan')

plt.figure(figsize=(10, 6))
bars = plt.bar(months, pct_new, color='tab:purple', alpha=0.85, label='New-employee share of successes')
if not math.isnan(overall_avg):
    plt.axhline(overall_avg, color='tab:orange', linestyle='--', linewidth=2, label=f'Feb-Dec avg = {overall_avg:.1f}%')

plt.xlabel('Month')
plt.ylabel('Percent of monthly successes from new employees (%)')
plt.ylim(0, 100)
plt.grid(axis='y', linestyle='--', alpha=0.3)

# Annotate bars
y_top = 100
for i, (m, p) in enumerate(zip(months, pct_new)):
    if i == 0:  # January
        plt.text(i, y_top * 0.05, 'N/A', ha='center', va='bottom', fontsize=9, color='gray')
    elif not math.isnan(p):
        plt.text(i, p, f'{p:.1f}%', ha='center', va='bottom', fontsize=9)

plt.legend(loc='upper right')
plt.tight_layout()
plt.show()

In [None]:
# Calculate effectiveness of emotional cues on success rate with significance

# Emotional cue keys: prefer a known set, fall back to those present in data
preferred_keys = ['personalized','urgent','authority','internal','fear','curiosity','help','reward']
present_keys = set()
for r in report:
    props = r.get('properties') or {}
    present_keys.update(props.keys())
keys = [k for k in preferred_keys if k in present_keys] + [k for k in sorted(present_keys) if k not in preferred_keys]

# Collect success (y) and per-cue flags (X) for the selected year
y = []  # 1=success, 0=not success
X = {k: [] for k in keys}
n_events = 0
for r in report:
    dt = parse_dt(r.get('date', ''))
    if not dt or dt.year != year_to_plot:
        continue
    n_events += 1
    y.append(1 if is_success(r) else 0)
    props = r.get('properties') or {}
    for k in keys:
        X[k].append(1 if props.get(k, False) else 0)

y = np.asarray(y, dtype=float)
if y.size == 0:
    print(f'No events available for year {year_to_plot}.')
    raise SystemExit

# Spearman correlation between success and each cue
results = []  # (cue, rho, p, n_true, n_false)
for k in keys:
    x = np.asarray(X[k], dtype=float)
    n_true = int(np.sum(x == 1))
    n_false = int(np.sum(x == 0))
    if np.std(x) == 0 or np.std(y) == 0:
        rho, p = np.nan, 1.0
    else:
        rho, p = spearmanr(x, y, nan_policy='omit')
    results.append((k, rho, p, n_true, n_false))

# Sort by absolute rho (descending)
results.sort(key=lambda t: (0 if np.isnan(t[1]) else abs(t[1])), reverse=True)

# Print summary table
print(f'Spearman correlation between success and emotional cues (year {year_to_plot})')
print(f'Events: {n_events}, successes: {int(y.sum())}, non-successes: {int(len(y) - y.sum())}')
print(f"{'Cue':<18} {'rho':>7} {'p-value':>10} {'nTrue':>7} {'nFalse':>7}")
for k, rho, p, n_t, n_f in results:
    lab = k.replace('_', ' ').title()
    rhos = 'nan' if np.isnan(rho) else f'{rho: .3f}'
    print(f"{lab:<18} {rhos:>7} {p:>10.3g} {n_t:>7} {n_f:>7}")

In [None]:
# Visualize emotional cue effects on success rate with significance

# Results from previous Spearman correlation
display_labels = [
    'Personalization', 'Urgency', 'Authority', 'Internal Sender', 'Fear', 'Curiosity', 'Help', 'Reward'
][::-1]
rho_values = [ 0.046, 0.002, -0.003, 0.060, 0.020, 0.042, 0.070, 0.012 ][::-1]
rho_values = [r * 100.0 for r in rho_values]  # convert to percentage points
p_values = [ 5.78e-08, 0.822, 0.76, 2.37e-12, 0.0178, 0.00508, 1.55e-16, 0.155 ][::-1]

y = list(range(len(display_labels)))

# Visualize statistical significance with color (higher -log10(p) = more significant)
# Keep sign of effect as the marker edge color (green=positive, red=negative)
sign_edge_colors = ['tab:green' if x >= 0 else 'tab:red' for x in rho_values]
sig = -np.log10(np.clip(p_values, 1e-300, 1.0))  # avoid log(0)
vmax = float(np.max(sig)) if len(sig) else 1.0
norm = Normalize(vmin=0, vmax=vmax)
cmap = plt.cm.viridis

plt.figure(figsize=(5, max(4, 0.35*len(display_labels) + 1)))
ax = plt.gca()
ax.axvline(0, color='black', linewidth=1, alpha=0.7)

# Draw dots with facecolor mapped to significance and edgecolor mapped to sign
sc = ax.scatter(rho_values, y, c=sig, cmap=cmap, norm=norm, s=90, zorder=3,
                edgecolors=sign_edge_colors, linewidths=1.5)

ax.set_xlim(-2, 10)
ax.set_yticks(y)
ax.set_yticklabels(display_labels)
ax.set_xlabel('Effect on success (percentage points)')
ax.grid(axis='x', linestyle='--', alpha=0.3)

# Add colorbar with p-value reference ticks
mappable = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
mappable.set_array([])
cbar = plt.colorbar(mappable, ax=ax, pad=0.02)
cbar.set_label('Significance (-log10 p)')
# Show common thresholds (only those within range)
threshold_ps = [0.05, 0.01, 0.001]
threshold_ticks = [max(0.0, -math.log10(p)) for p in threshold_ps if -math.log10(p) <= vmax + 1e-9]
ticks = [0.0] + threshold_ticks + ([vmax] if vmax > 0 and abs(vmax - threshold_ticks[-1] if threshold_ticks else vmax) > 1e-9 else [])
cbar.set_ticks(ticks)
tick_labels = ['1.0'] + [f'{p:g}' for p in threshold_ps if -math.log10(p) <= vmax + 1e-9] + ([f'{10**(-vmax):.1e}'] if vmax not in ([0.0] + threshold_ticks) and vmax > 0 else [])
cbar.set_ticklabels(tick_labels)

# Annotate each dot with its effect value (no stars)
for xx, yy in zip(rho_values, y):
    ha = 'left' if xx >= 0 else 'right'
    offset = 0.15 if xx >= 0 else -0.15
    ax.text(xx + offset, yy - 0.2, f'{xx:.1f} pp', va='center', ha=ha, fontsize=8)

plt.tight_layout()
plt.savefig('plots/emotional_cue_effects.pdf')
plt.show()