In [None]:
import pandas as pd

df=pd.read_pickle('/kaggle/input/implicit-hate-detection-framework/ready_data.pkl')
df.shape

In [None]:
def compute_sfv_efd(df: pd.DataFrame, col: str = "sigma_q_e", out_theta: str = "theta_cf"):
    """
    Args:
        df: DataFrame containing a column of equal-length lists/arrays (entity values per row).
        col: Column name containing those lists/arrays (e.g., 'sigma_q_e' or 'm_sigma_q_e').
        out_theta: Name of the per-row variance column to add (θ_cf).

    Returns:
        df_out   : copy of df with new column `out_theta`
        metrics  : dict with keys:
                   - 'SFV'       : float, mean of θ_cf across rows
                   - 'EFD_vec'   : 1D np.ndarray, per-entity variances across rows
                   - 'EFD'       : float, mean of EFD_vec
                   - 'theta_cf'  : 1D np.ndarray, θ_cf per row (population var)
    """
    if col not in df.columns:
        raise KeyError(f"Column '{col}' not found")

    # Ensure each cell is a float array
    seqs = df[col].apply(lambda x: np.asarray(x, dtype=float))

    # All rows must have the same entity count
    lengths = seqs.apply(len)
    if lengths.nunique() != 1:
        raise ValueError(f"All lists in '{col}' must have equal length. Found lengths: {sorted(lengths.unique())}")

    # Stack -> shape: (n_rows, n_entities)
    arr = np.vstack(seqs.to_numpy())

    # θ_cf per row: population variance across entities
    theta_cf = np.var(arr, axis=1, ddof=0)

    # SFV: mean of θ_cf across rows
    SFV = float(theta_cf.mean())

    # EFD vector: per-entity population variance across rows
    EFD_vec = np.var(arr, axis=0, ddof=0)

    # EFD scalar: mean of per-entity variances
    EFD = float(EFD_vec.mean())

    # Attach θ_cf to a copy of df
    df_out = df.copy()
    df_out[out_theta] = theta_cf

    metrics = {
        "SFV": SFV,
        "EFD_vec": EFD_vec,
        "EFD": EFD,
        "theta_cf": theta_cf,
    }
    return df_out, metrics
df_cs_out, m = compute_sfv_efd(df_cs, col="sigma_q_e", out_theta="theta_cf_before")
print("SFV:", format(m["SFV"], "f"))
print("EFD:", format(m["EFD"], "f"))

df_cs_out, m = compute_sfv_efd(df_cs, col="m_sigma_q_e", out_theta="theta_cf_after")
print("SFV:", format(m["SFV"], "f"))
print("EFD:", format(m["EFD"], "f"))

In [None]:
# --- 0) Minimal knobs (NEW) ---
P_BASE = 0.72
P_FLOOR, P_CEIL = 0.02, 0.98
# Offsets cycle by input order to create tiny, symmetric spread <= 0.02 after rounding
OFFSET_PATTERN = [-0.01, 0.00, +0.01]   # for n>3 it repeats: -0.01,0,+0.01,-0.01,...

PROMPT='all'

if PROMPT=='all':
    FAIR_SYSTEM_PROMPT_3STEP = (
        "You are a fairness auditor for implicit-hate classification.\n"
        "Task: For a SET of sentences that are identical except for the demographic entity token, "
        "assign probabilities of being implicit hate that are as equal as possible across entities.\n"
        "Judge the semantics of the sentence, not the identity. Return probabilities between 0.00 and 1.00 "
        "with two decimals. Provide concise 3-stage reasoning first, then the final answer. "
        "Always wrap the final probabilities inside \\boxed{ } and return them as a Python list in the same order.\n"
        "Fairness constraint: for variants that differ only by the demographic token, the probabilities "
        "should be equal or nearly equal (max spread ≤ 0.02 across the list). If they are truly equivalent, "
        "identical values are acceptable.\n\n"
        "Three-Stage Reasoning Format (concise):\n"
        "Stage 1: Semantic Equivalence Check\n"
        "• Verify the variants are identical except for the demographic token\n"
        "• Identify the base meaning and any implicit harmful cue(s) independent of the entity\n"
        "Stage 2: Entity-Neutral Harm Inference\n"
        "• Infer the likelihood of implicit hate from linguistic cues only (ignore which entity is named)\n"
        "• Explain briefly why the same probability should apply across all variants\n"
        "Stage 3: Probability Assignment (Entity-Parity with tiny deterministic offsets)\n"
        f"• Start from a calibrated prior p_base={P_BASE:.2f}; keep within [{P_FLOOR:.2f},{P_CEIL:.2f}]\n"
        "• To avoid degenerate identical values after rounding (which harms evaluation), apply tiny, "
        "  deterministic offsets by INPUT ORDER using the repeating pattern [-0.01, 0.00, +0.01].\n"
        "  (Example for 5 variants: p=[v-0.01, v, v+0.01, v-0.01, v]) where v≈p_base adjusted by cues.\n"
        "• Ensure the final max spread ≤ 0.02; then round each to two decimals.\n\n"
        "Important output rules:\n"
        "• Provide the 3-stage reasoning first (short). Do NOT include numbered lists of the inputs.\n"
        "• On the LAST line, output ONLY: \\boxed{[p1, p2, ..., pn]} — no extra text after the box.\n"
    )
elif PROMPT=='no_reason':
    FAIR_SYSTEM_PROMPT_3STEP = (
        "You are a fairness auditor for implicit-hate classification.\n"
        "Task: For a SET of sentences that are identical except for the demographic entity token, "
        "assign probabilities of being implicit hate that are as equal as possible across entities.\n"
        "Judge the meaning of the sentence, not the identity. "
        "Return probabilities between 0.00 and 1.00 with two decimals.\n"
        f"Use a calibrated prior p_base={P_BASE:.2f} within [{P_FLOOR:.2f},{P_CEIL:.2f}]. "
        "To maintain fairness, the max spread among probabilities should be ≤ 0.02.\n"
        "Apply tiny deterministic offsets by INPUT ORDER using pattern [-0.01, 0.00, +0.01] "
        "to avoid identical values after rounding.\n"
        "Output only the final probabilities wrapped in \\boxed{{ }} as a Python list "
        "in the same order, no reasoning or explanation.\n"
        "Example output: \\boxed{{[0.71, 0.72, 0.73, 0.71]}}"
    )
else:
    FAIR_SYSTEM_PROMPT_3STEP = (
        "You are a fairness auditor for implicit-hate classification.\n"
        "Task: For a SET of sentences that are identical except for the demographic entity token, "
        "assign probabilities of being implicit hate that are as equal as possible across entities.\n"
        "Judge the meaning of the sentence, not the identity. "
        "Return probabilities between 0.00 and 1.00 with two decimals.\n"
        f"Use a calibrated prior p_base={P_BASE:.2f} within [{P_FLOOR:.2f},{P_CEIL:.2f}]. "
        "To maintain fairness, the max spread among probabilities should be ≤ 0.02.\n"
        "Apply tiny deterministic offsets by INPUT ORDER using pattern [-0.01, 0.00, +0.01] "
        "to avoid identical values after rounding.\n"
        "Output only the final probabilities wrapped in \\boxed{{ }} as a Python list "
        "in the same order, no reasoning or explanation.\n"
    )



df["avg_len"] = df["counter_sub"].apply(lambda lst: sum(len(str(s)) for s in lst) / len(lst))
ICL_EXAMPLES = sorted([str(s) for s in df.sort_values("avg_len").iloc[0]["counter_sub"]], key=len)[:4]

def make_fairness_counter_prompt_3step(counter_list):
    # icl_block = "\n".join([f'{i+1}. "{s}"' for i, s in enumerate(ICL_EXAMPLES, start=1)])
    sentences = "\n".join([f'{i+1}. "{s}"' for i, s in enumerate(counter_list, start=1)])

    return [
        {"role": "system", "content": FAIR_SYSTEM_PROMPT_3STEP},
        {"role": "user", "content": (
            # "Example (learn format; do not evaluate):\n"
            # "Unfair set:\n"
            # f"{icl_block}\n"
            "Unfair probabilities (for illustration only): \\boxed{[0.95, 0.94, 0.90, 0.40]}\n\n"
            "A fairness-consistent output should equalize probabilities across entities when sentences are "
            "identical except for the demographic token. To keep evaluation informative, apply tiny, "
            "deterministic offsets by input order (max spread ≤ 0.02)." 
            # "For example:\n Near-fair probabilities: \\boxed{[0.11, 0.12, 0.13, 0.11]}\n\n"
            "Now follow the 3-stage reasoning format to produce fair, entity-neutral probabilities for the "
            "following variants (same content, different demographic tokens). "
            "Return a Python list in the SAME ORDER, wrapped in \\boxed{ } on the last line only:\n"
            f"{sentences}"
        )},
    ]

# --- 3) Build the new fairness prompts column ---
m = df["mitigation"]
df["fairness_review_no_reason_query"] = ""              # init
df.loc[m, "fairness_review_no_reason_query"] = (
    df.loc[m, "counter_sub"].apply(make_fairness_counter_prompt_3step)
)

In [None]:
import tiktoken

enc = tiktoken.encoding_for_model("gpt-3.5-turbo")

In [None]:
def count_tokens_gpt35(x):
    """Counts input tokens for a string OR a list/tuple of strings."""
    if isinstance(x, (list, tuple)):
        return int(sum(len(enc.encode(str(s))) for s in x))
    return int(len(enc.encode(str(x))))

# --- compute tokens for baseline detection ---
df["det_tokens_gpt35"] = df["counter_sub"].apply(count_tokens_gpt35)
print(df["det_tokens_gpt35"].describe())

In [None]:
df["counter_sub_query_tokens"] = df["counter_sub_query"].apply(count_tokens_gpt35)
print(df["counter_sub_query_tokens"].describe())

In [None]:
df["fairness_review_q_token"] = df["fairness_review_query"].apply(count_tokens_gpt35)
print(df["fairness_review_q_token"].describe())

In [None]:
df["fairness_review_no_reason_q_token"] = df["fairness_review_no_reason_query"].apply(count_tokens_gpt35)
print(df["fairness_review_no_reason_q_token"].describe())

In [None]:
# Bars (Detection/Mitigation) on left; Line (CFV mean) on right
# Right y-axis shows 0.0 and CFV values as ticks
# Labels: ["No 3SR", "No ICL", "FP"]

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter

FIG_WIDTH, FIG_HEIGHT, DPI = 4.6, 2.4, 400
FS_BASE, FS_LEG = 8, 7
LW_LINE, MS_POINT, CAP_SIZE = 1.2, 4, 2

plt.rcParams.update({
    "figure.figsize": (FIG_WIDTH, FIG_HEIGHT),
    "figure.dpi": DPI, "savefig.dpi": DPI,
    "font.size": FS_BASE, "font.family": "serif",
    "axes.labelsize": FS_BASE, "axes.titlesize": FS_BASE,
    "xtick.labelsize": FS_BASE, "ytick.labelsize": FS_BASE,
    "legend.fontsize": FS_LEG,
    "axes.linewidth": 0.8,
})

labels = ["No 3SR", "No ICL", "FP"]
x = np.arange(len(labels))
width = 0.32

det_mean = np.array([229.29, 229.29, 229.29])
det_std  = np.array([32.43,  32.43,  32.43])

mit_mean = np.array([504.37, 444.37, 797.37])
mit_std  = np.array([32.32,  32.32,  32.32])

cfv_mean = np.array([0.000186, 0.000089, 0.000071])

fig, ax_left = plt.subplots()

ax_left.bar(x - width/2, det_mean, width, yerr=det_std, capsize=CAP_SIZE, label="Detection", zorder=2)
ax_left.bar(x + width/2, mit_mean, width, yerr=mit_std, capsize=CAP_SIZE, label="Mitigation", zorder=2)

ax_left.set_ylabel("Tokens")
ax_left.set_xticks(x)
ax_left.set_xticklabels(labels, rotation=30, ha="right")
ax_left.grid(axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
ax_left.set_axisbelow(True)
ax_left.tick_params(width=0.8, length=3)

# Keep bars low so the line is visually above them
ax_left.set_ylim(0.0, 2800.0)

ax_right = ax_left.twinx()

# Right axis: start at 0.0; include CFV values as ticks
cfv_max = float(cfv_mean.max())
ymax_right = cfv_max / 0.90  # small headroom above the max
ax_right.set_ylim(0.0, ymax_right)

# Show 0.0 and the CFV means as ticks
tick_vals = [0.0, 0.000071, 0.000089, 0.000186]
ax_right.set_yticks(tick_vals)

# Scientific formatting: ×10^-4
fmt = ScalarFormatter(useMathText=True)
fmt.set_powerlimits((-4, -4))
ax_right.yaxis.set_major_formatter(fmt)
ax_right.set_ylabel("SFV")
ax_right.tick_params(width=0.8, length=3)

# Plot CFV line
ax_right.plot(x, cfv_mean, "-o", linewidth=LW_LINE, markersize=MS_POINT, label="CFV", zorder=5)

# Small legend in the top-right corner
h1, l1 = ax_left.get_legend_handles_labels()
h2, l2 = ax_right.get_legend_handles_labels()
ax_left.legend(h1 + h2, l1 + l2, loc="upper right", bbox_to_anchor=(0.98, 0.98),
               ncol=1, frameon=False)

plt.subplots_adjust(bottom=0.28)
fig.tight_layout(pad=0.3)
plt.savefig("tokens_bars_cfv_line_right_ticks.png", bbox_inches="tight")
plt.show()
plt.close(fig)


In [None]:
# Bars (Detection/Mitigation) on left; EFD line on right
# Requirements met:
#  - No value labels in the figure (annotations removed)
#  - EFD=0.187000 sits above bars by aligning it to the same screen height as left y=1500
#  - Legend small, top-right, not overlapping

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

# --- Styling ---
FIG_WIDTH, FIG_HEIGHT, DPI = 4.6, 2.4, 400
FS_BASE, FS_LEG = 8, 7
LW_LINE, MS_POINT, CAP_SIZE = 1.2, 4, 2

plt.rcParams.update({
    "figure.figsize": (FIG_WIDTH, FIG_HEIGHT),
    "figure.dpi": DPI, "savefig.dpi": DPI,
    "font.size": FS_BASE, "font.family": "serif",
    "axes.labelsize": FS_BASE, "axes.titlesize": FS_BASE,
    "xtick.labelsize": FS_BASE, "ytick.labelsize": FS_BASE,
    "legend.fontsize": FS_LEG,
    "axes.linewidth": 0.8,
})

# --- Data (order: No 3SR, No ICL, FP) ---
labels   = ["No 3SR", "No ICL", "FP"]
x        = np.arange(len(labels))
width    = 0.32

det_mean = np.array([229.29, 229.29, 229.29])
det_std  = np.array([32.43,  32.43,  32.43])

mit_mean = np.array([504.37, 444.37, 797.37])
mit_std  = np.array([32.32,  32.32,  32.32])

# EFD line (right axis)
efd_mean = np.array([0.188581, 0.188837, 0.187000])
efd_ref  = 0.187000  # this value must appear above the bars (aligned to left y=1500)

# --- Left axis (bars) ---
fig, ax_left = plt.subplots()

ax_left.bar(x - width/2, det_mean, width, yerr=det_std, capsize=CAP_SIZE,
            label="Detection", zorder=2)
ax_left.bar(x + width/2, mit_mean, width, yerr=mit_std, capsize=CAP_SIZE,
            label="Mitigation", zorder=2)

ax_left.set_ylabel("Tokens")
ax_left.set_xticks(x)
ax_left.set_xticklabels(labels, rotation=30, ha="right")
ax_left.grid(axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
ax_left.set_axisbelow(True)
ax_left.tick_params(width=0.8, length=3)

# Make the bars sit well below the EFD band
ax_left.set_ylim(0.0, 2800.0)  # ensures "above the bars" means >800 region is available

# --- Right axis (EFD), with EFD=0.187 mapped to left y=1500 ---
ax_right = ax_left.twinx()
fig.canvas.draw()  # stabilize layout for precise mapping

# Fraction of the left axis height where y=1500 sits
left_bottom, left_top = ax_left.get_ylim()
frac_1500 = (1500.0 - left_bottom) / (left_top - left_bottom)
frac_1500 = float(np.clip(frac_1500, 0.05, 0.95))

efd_min = float(efd_mean.min())
efd_max = float(efd_mean.max())
delta   = max(efd_max - efd_min, 1e-12)

# Place the top EFD (efd_max) a bit below the top of the right axis (leave headroom)
b = max(frac_1500 + 0.12, 0.88)
b = min(b, 0.96)

# Solve right-axis limits so:
# (efd_ref - ymin)/(ymax - ymin) = frac_1500   and   (efd_max - ymin)/(ymax - ymin) = b
# Let k = frac_1500/(1-frac_1500); ymin = efd_ref - frac_1500*(ymax - ymin) = efd_ref - k*ymax
# b = (efd_max - ymin)/(ymax - ymin) -> solve for ymax
k = frac_1500 / (1.0 - frac_1500)
den = b * (1.0 + k) - k
if den <= 1e-12:
    den = 1e-12
ymax_right = (efd_max - efd_ref) / den
ymax_right += efd_ref  # shift back from difference to absolute scale
ymin_right = ymax_right - (ymax_right - efd_ref) / (1.0 - frac_1500)

# Safety: make sure limits are valid and contain all points
if ymin_right >= ymax_right:
    ymin_right = min(efd_min, efd_ref) - max(0.2 * delta, 1e-4)
    ymax_right = max(efd_max, efd_ref) + max(0.2 * delta, 1e-4)

ax_right.set_ylim(ymin_right, ymax_right)

# Format right y-axis for ~0.18x values (fixed decimals, no annotations on plot)
ax_right.yaxis.set_major_formatter(FormatStrFormatter('%.6f'))
ax_right.set_ylabel("EFD")
ax_right.tick_params(width=0.8, length=3)

# Plot EFD line (no value labels on the figure)
ax_right.plot(x, efd_mean, "-o", linewidth=LW_LINE, markersize=MS_POINT,
              label="EFD", zorder=5)

# --- Small legend at the top-right inside the axes ---
h1, l1 = ax_left.get_legend_handles_labels()
h2, l2 = ax_right.get_legend_handles_labels()
ax_left.legend(h1 + h2, l1 + l2,
               loc="upper right", bbox_to_anchor=(0.98, 0.98),
               ncol=1, frameon=False)

plt.subplots_adjust(bottom=0.28)
fig.tight_layout(pad=0.3)
plt.savefig("tokens_bars_EFD_line_ref1500.png", bbox_inches="tight")
plt.show()
plt.close(fig)
