<a href="https://colab.research.google.com/github/ChrisChukwunyereNwaiwu/AI-Dev-Research/blob/main/AIDev_RQ5_Analysis_clean_(1)_static6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports & config

In [None]:
# ---- ALL LIBRARIES AT THE TOP ----
import os, re, json, math, warnings, random
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.formula.api as smf
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score

import os, math, json, re, numpy as np, pandas as pd
from textwrap import fill
import datetime as dt

from datasets import load_dataset

warnings.filterwarnings("ignore")
pd.set_option("display.max_colwidth", 180)
Path("results").mkdir(exist_ok=True)

RNG_SEED = 42

In [None]:
!pip -q install python-docx

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━[0m [32m204.8/253.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h

Load AIDev (PR-only is fine)

In [None]:
ds_pr = load_dataset("hao-li/AIDev", "all_pull_request")
pr_df = ds_pr["train"].to_pandas()
print("PRs:", len(pr_df))


README.md: 0.00B [00:00, ?B/s]

all_pull_request.parquet:   0%|          | 0.00/331M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

PRs: 932791


**Core feature engineering (shared by 5a/5b/5c)**


Build: accepted, clarify_strict (and clarify_var), log_stars, pr_size_log1p, changed_files_log1p, language, touched paths + text patterns (for 5a), security flags (for 5c), and timestamps (optional).

In [None]:
# --- helpers (accepted, clarify, paths) ---
QWORDS = re.compile(r"\b(what|when|where|why|how|should|could|would|which|if)\b", re.I)
code_block  = re.compile(r"```.+?```", re.S)
inline_code = re.compile(r"`[^`]+`")
url_pat     = re.compile(r"https?://\S+")

def robust_accepted(df):
    acc = pd.Series(False, index=df.index)
    for col in ["merged","is_merged","merge","merged_bool"]:
        if col in df.columns:
            acc |= df[col].astype(str).str.strip().str.lower().isin(["true","1","yes","y"])
    for col in ["merged_at","mergedAt","merge_at","merge_date"]:
        if col in df.columns:
            acc |= (df[col].astype(str).str.strip().ne("") & df[col].notna())
    for col in ["state","status","merge_state"]:
        if col in df.columns:
            s = df[col].astype(str).str.strip().str.lower()
            acc |= s.eq("merged")
    return acc.astype(int)

def clean_text(x:str)->str:
    if not isinstance(x,str): return ""
    x = code_block.sub(" ", x); x = inline_code.sub(" ", x); x = url_pat.sub(" ", x)
    return x

def clarify_strict_row(t,b):
    t,b = clean_text(t), clean_text(b)
    return int(("?" in t or "?" in b) and (bool(QWORDS.search(t)) or bool(QWORDS.search(b))))

def parse_paths_cell(x):
    if x is None or (isinstance(x,float) and math.isnan(x)): return []
    if isinstance(x, list): return [str(y) for y in x]
    s = str(x).strip()
    if not s: return []
    if (s.startswith("[") and s.endswith("]")) or (s.startswith("{") and s.endswith("}")):
        try:
            obj = json.loads(s)
            if isinstance(obj, list): return [str(y) for y in obj]
            if isinstance(obj, dict) and isinstance(obj.get("files"), list):
                return [str(y) for y in obj["files"]]
        except: pass
    if any(d in s for d in [",",";","\n"]):
        return [p.strip() for p in re.split(r"[,\n;]+", s) if p.strip()]
    return [s]

def extract_paths_series(df):
    for col in ["changed_files_list","files_changed_list","file_paths","paths","filenames","files","changed_files_details"]:
        if col in df.columns:
            return df[col].apply(parse_paths_cell)
    return pd.Series([[]]*len(df), index=df.index)

# --- build df ---
df = pr_df.copy()
df["title"] = df.get("title", pd.Series("", index=df.index)).fillna("").astype(str)
df["body"]  = df.get("body", pd.Series("", index=df.index)).fillna("").astype(str)
df["accepted"] = robust_accepted(df)

# controls
df["stars"] = pd.to_numeric(df.get("stars", df.get("stargazers_count", df.get("watchers", pd.Series(0, index=df.index)))), errors="coerce").fillna(0)
df["log_stars"] = np.log1p(df["stars"])
df["additions"] = pd.to_numeric(df.get("additions", pd.Series(0, index=df.index)), errors="coerce").fillna(0)
df["deletions"] = pd.to_numeric(df.get("deletions", pd.Series(0, index=df.index)), errors="coerce").fillna(0)
df["pr_size_log1p"] = np.log1p(df["additions"] + df["deletions"])
df["changed_files_n"] = pd.to_numeric(df.get("changed_files", pd.Series(0, index=df.index)), errors="coerce").fillna(0)
df["changed_files_log1p"] = np.log1p(df["changed_files_n"])
df["language"] = df.get("language", pd.Series("Unknown", index=df.index)).fillna("Unknown").astype(str)

# clarify vars
df["clarify_strict"] = [clarify_strict_row(t,b) for t,b in zip(df["title"], df["body"])]
clarify_var = "clarify_strict" if df["clarify_strict"].nunique()==2 else "clarify"

# touched paths + text patterns (5a)
paths_series = extract_paths_series(df)
def flag_paths(paths, rx): return int(any(re.search(rx, p, flags=re.I) for p in paths))
df["touch_tests"] = paths_series.apply(lambda ps: flag_paths(ps, r"(^|/)(test|tests|spec|__tests__)(/|$)"))
df["touch_docs"]  = paths_series.apply(lambda ps: flag_paths(ps, r"(^|/)(docs?|doc|readme\.md)(/|$)|\.md$"))
df["touch_src"]   = paths_series.apply(lambda ps: flag_paths(ps, r"(^|/)(src|lib)(/|$)"))
df["touch_deps"]  = paths_series.apply(lambda ps: flag_paths(ps, r"(package(-lock)?\.json|yarn\.lock|pnpm-lock\.yaml|requirements\.txt|poetry\.lock|setup\.py|pyproject\.toml|pom\.xml|go\.mod|Cargo\.toml|build\.gradle(\.kts)?)"))
df["docs_only"]   = ((df["touch_docs"]==1) & (df["touch_src"]==0) & (df["touch_tests"]==0)).astype(int)

PATTERNS = {
    "pat_docs": r"\b(doc(s)?|readme)\b",
    "pat_build_ci": r"\b(ci|build|pipeline|workflow|github actions)\b",
    "pat_lint": r"\b(lint|format|prettier|flake8|eslint|black)\b",
    "pat_revert_hotfix": r"\b(revert|rollback|hotfix)\b",
    "pat_test_fail": r"\b(test(s)?\s*(is|are)?\s*failing|fail(ed|ure)|flake)\b",
}
for name, rx in PATTERNS.items():
    df[name] = (df["title"].str.contains(rx, case=False, regex=True, na=False) |
                df["body"].str.contains(rx, case=False, regex=True, na=False)).astype(int)

# security (5c)
SEC_RX    = r"\b(security|vulnerab|cve-|cwe-|xss|csrf|rce|injection|auth|encrypt|token|credential)\b"
BUMP_RX   = r"\b(bump|upgrade|update)\b"
SECRET_RX = r"\b(password|apikey|api[-_ ]?key|secret|token|access[-_ ]?key|private[-_ ]?key)\b"
df["sec_text"]         = (df["title"].str.contains(SEC_RX, case=False, regex=True, na=False) |
                          df["body"].str.contains(SEC_RX, case=False, regex=True, na=False)).astype(int)
df["sec_dep_bump"]     = (df["body"].str.contains(BUMP_RX, case=False, regex=True, na=False) & (df["touch_deps"]==1)).astype(int)
df["sec_secret_terms"] = (df["title"].str.contains(SECRET_RX, case=False, regex=True, na=False) |
                          df["body"].str.contains(SECRET_RX, case=False, regex=True, na=False)).astype(int)
df["security_flag"]    = ((df["sec_text"]==1) | (df["sec_dep_bump"]==1) | (df["sec_secret_terms"]==1)).astype(int)

print("Engineered columns ready.")

Engineered columns ready.


Utilities (bootstrap + small helpers)

In [None]:
def bootstrap_accept_gap(d, col, n_boot=1000, seed=RNG_SEED):
    d = d[[col,"accepted"]].dropna()
    d0 = d.loc[d[col]==0, "accepted"].to_numpy()
    d1 = d.loc[d[col]==1, "accepted"].to_numpy()
    if len(d0)==0 or len(d1)==0:
        return np.nan, (np.nan, np.nan), len(d0), len(d1)
    rng = np.random.default_rng(seed)
    gaps = [rng.choice(d1, len(d1), True).mean() - rng.choice(d0, len(d0), True).mean()
            for _ in range(n_boot)]
    lo, hi = np.percentile(gaps, [2.5, 97.5])
    return float(np.mean(gaps)), (float(lo), float(hi)), len(d0), len(d1)


Section A/B/C cells

 RQ 5(a): Failure patterns & touched paths

In [None]:
print("===== RQ 5(a): Failure patterns & touched paths =====")
fail_cols = [c for c in df.columns if c.startswith("pat_")]
path_cols = ["docs_only","touch_tests","touch_docs","touch_deps","touch_src"]

# Prevalence %
prev_5a = (pd.concat([df[fail_cols].mean(), df[path_cols].mean()])*100).sort_values(ascending=False).round(2)
print("Top prevalence signals:\n", prev_5a.head(10))

# Δ acceptance (pp)
def acc_delta(col):
    g = df.groupby(col)["accepted"].mean()
    return float((g.get(1,np.nan) - g.get(0,np.nan))*100)
deltas_5a = pd.Series({c: acc_delta(c) for c in (fail_cols+path_cols)}).sort_values(ascending=False)
print("\nTop Δ acceptance signals (pp):\n", deltas_5a.head(10))

# Small per-signal logit (top 3 by prevalence)
top3 = [c for c in prev_5a.index if df[c].nunique()==2][:3]
for s in top3:
    keep = ["accepted", s, clarify_var, "language", "log_stars", "pr_size_log1p"]
    d = df[keep].dropna().copy()
    if d["accepted"].nunique()==2 and d[s].nunique()==2:
        try:
            m = smf.logit(f"accepted ~ {s} + {clarify_var} + log_stars + pr_size_log1p + C(language)", data=d).fit(disp=False)
            print(f"\n[5(a)] Logit with {s}:\n", m.summary().tables[1])
        except Exception as e:
            print(f"[5(a)] logit with {s} failed:", e)


===== RQ 5(a): Failure patterns & touched paths =====
Top prevalence signals:
 pat_docs             20.23
pat_build_ci         18.36
pat_lint             16.45
pat_test_fail         3.25
pat_revert_hotfix     0.58
docs_only             0.00
touch_tests           0.00
touch_docs            0.00
touch_deps            0.00
touch_src             0.00
dtype: float64

Top Δ acceptance signals (pp):
 pat_lint            -0.609041
pat_test_fail       -3.630618
pat_build_ci        -4.002290
pat_revert_hotfix   -6.219122
pat_docs            -7.009582
docs_only                 NaN
touch_tests               NaN
touch_docs                NaN
touch_deps                NaN
touch_src                 NaN
dtype: float64
[5(a)] logit with pat_docs failed: Singular matrix
[5(a)] logit with pat_build_ci failed: Singular matrix
[5(a)] logit with pat_lint failed: Singular matrix


RQ 5(b): Early signals (clarify) → acceptance

In [None]:
print("===== RQ 5(b): Early signals → acceptance =====")
acc_overall = df["accepted"].mean()
acc0 = df.loc[df[clarify_var]==0, "accepted"].mean() if (df[clarify_var]==0).any() else np.nan
acc1 = df.loc[df[clarify_var]==1, "accepted"].mean() if (df[clarify_var]==1).any() else np.nan
gap_mean, (gap_lo, gap_hi), n0, n1 = bootstrap_accept_gap(df, clarify_var, n_boot=1000)

print(f"N={len(df):,} | overall acc={acc_overall*100:.2f}% | {clarify_var}=0: {acc0*100:.2f}% | {clarify_var}=1: {acc1*100:.2f}%")
print(f"Δ acc (pp)={(acc1-acc0)*100:.2f} | 95% CI [{gap_lo*100:.2f}, {gap_hi*100:.2f}]")

# Regularized LR with controls
keep = ["accepted", clarify_var, "language", "log_stars", "pr_size_log1p"]
d = df[keep].dropna().copy()
if d["accepted"].nunique()==2:
    X = d[[clarify_var,"language","log_stars","pr_size_log1p"]]; y = d["accepted"]
    pre = ColumnTransformer([("lang", OneHotEncoder(handle_unknown="ignore"), ["language"])], remainder="passthrough")
    clf = Pipeline([("pre", pre), ("lr", LogisticRegression(max_iter=700, solver="lbfgs", class_weight="balanced"))])
    clf.fit(X,y)
    X1, X0 = X.copy(), X.copy()
    X1[clarify_var] = 1; X0[clarify_var] = 0
    me_c = clf.predict_proba(X1)[:,1].mean() - clf.predict_proba(X0)[:,1].mean()
    auc  = roc_auc_score(y, clf.predict_proba(X)[:,1])
    print(f"Regularized Δprob={me_c*100:.2f} pp | AUC={auc:.3f}")


===== RQ 5(b): Early signals → acceptance =====
N=932,791 | overall acc=84.71% | clarify_strict=0: 84.78% | clarify_strict=1: 70.24%
Δ acc (pp)=-14.54 | 95% CI [-15.80, -13.27]
Regularized Δprob=-20.61 pp | AUC=0.503


RQ 5(c): Security signals

In [None]:
print("===== RQ 5(c): Security signals =====")
sec_prev = df["security_flag"].mean()*100 if "security_flag" in df.columns else np.nan
print(f"security_flag prevalence: {sec_prev:.2f}%")

if "security_flag" in df.columns and df["security_flag"].nunique()==2:
    sec_acc = df.groupby("security_flag")["accepted"].mean()*100
    print("Acceptance by security_flag:\n", sec_acc.rename(index={0:"No",1:"Yes"}))

    # Optional time-to-merge (merged only, if timestamps exist)
    for tcol in ["created_at","merged_at","closed_at","updated_at"]:
        if tcol in df.columns:
            df[tcol] = pd.to_datetime(df[tcol], errors="coerce", utc=True)
    if {"created_at","merged_at"}.issubset(df.columns):
        d_m = df[df["accepted"]==1].copy()
        ttm = (d_m["merged_at"] - d_m["created_at"]).dt.total_seconds()/(3600*24)
        if ttm.notna().any():
            med0 = ttm[d_m["security_flag"]==0].median()
            med1 = ttm[d_m["security_flag"]==1].median()
            print(f"Median TTM (days): security={med1:.2f} vs non-security={med0:.2f}")

    # Focused logit
    keep = ["accepted","security_flag",clarify_var,"language","log_stars","pr_size_log1p"]
    d = df[keep].dropna().copy()
    if d["accepted"].nunique()==2:
        try:
            m_sec = smf.logit(f"accepted ~ security_flag + {clarify_var} + log_stars + pr_size_log1p + C(language)", data=d).fit(disp=False)
            print("\n[5(c)] security_flag model:\n", m_sec.summary().tables[1])
        except Exception as e:
            print("security_flag logit failed:", e)


===== RQ 5(c): Security signals =====
security_flag prevalence: 5.68%
Acceptance by security_flag:
 security_flag
No     85.189879
Yes    76.689814
Name: accepted, dtype: float64
Median TTM (days): security=0.00 vs non-security=0.00
security_flag logit failed: Singular matrix


compile RQ 5(a), 5(b), 5(c) into one page + optional DOCX

In [None]:


os.makedirs("results", exist_ok=True)

# ---------- small helpers ----------
def fmt_pct(x, digits=2):
    return "n/a" if x is None or (isinstance(x, float) and (np.isnan(x) or np.isinf(x))) else f"{x*100:.{digits}f}%"

def fmt_pp(x, digits=2):
    return "n/a" if x is None or (isinstance(x, float) and (np.isnan(x) or np.isinf(x))) else f"{x*100:.{digits}f}"

def fmt_num(x):
    try: return f"{int(x):,}"
    except: return "n/a"

def bootstrap_accept_gap(d, col, n_boot=1000, seed=42):
    d = d[[col, "accepted"]].dropna()
    d0 = d.loc[d[col]==0, "accepted"].to_numpy()
    d1 = d.loc[d[col]==1, "accepted"].to_numpy()
    if len(d0)==0 or len(d1)==0:
        return np.nan, (np.nan, np.nan), len(d0), len(d1)
    rng = np.random.default_rng(seed)
    gaps = []
    for _ in range(n_boot):
        gaps.append(rng.choice(d1, len(d1), True).mean() - rng.choice(d0, len(d0), True).mean())
    gaps = np.array(gaps)
    lo, hi = np.percentile(gaps, [2.5, 97.5])
    return float(gaps.mean()), (float(lo), float(hi)), len(d0), len(d1)

# ---------- shared objects from earlier cells (rebuild if missing) ----------
assert "df" in globals(), "I need the main dataframe `df` from previous cells."

# Clarify column used throughout
clarify_col = "clarify_strict" if ("clarify_strict" in df.columns and df["clarify_strict"].nunique()==2) \
              else ("clarify" if "clarify" in df.columns else None)

# Controls (recompute if not present)
if "log_stars" not in df.columns:
    df["stars"] = pd.to_numeric(df.get("stars", df.get("stargazers_count", df.get("watchers", 0))), errors="coerce").fillna(0)
    df["log_stars"] = np.log1p(df["stars"])
if "pr_size_log1p" not in df.columns:
    df["additions"] = pd.to_numeric(df.get("additions", 0), errors="coerce").fillna(0)
    df["deletions"] = pd.to_numeric(df.get("deletions", 0), errors="coerce").fillna(0)
    df["pr_size_log1p"] = np.log1p(df["additions"] + df["deletions"])

# ---------- 5(b): Early signals → acceptance ----------
acc_overall = float(df["accepted"].mean()) if df["accepted"].nunique()>0 else np.nan
acc0 = float(df.loc[df[clarify_col]==0, "accepted"].mean()) if clarify_col and (df[clarify_col]==0).any() else np.nan
acc1 = float(df.loc[df[clarify_col]==1, "accepted"].mean()) if clarify_col and (df[clarify_col]==1).any() else np.nan
gap_mean, (gap_lo, gap_hi), n0, n1 = (np.nan, (np.nan, np.nan), 0, 0)
if clarify_col:
    gap_mean, (gap_lo, gap_hi), n0, n1 = bootstrap_accept_gap(df, clarify_col, n_boot=1000)

# Try to fetch Option-C Δprob & AUC from earlier, else compute quickly
me_c_val = None; auc_val = None
try:
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import roc_auc_score
    keep = ["accepted", clarify_col, "language", "log_stars", "pr_size_log1p"]
    if clarify_col and all(c in df.columns for c in keep):
        dC = df[keep].dropna().copy()
        if dC["accepted"].nunique()==2:
            X = dC[[clarify_col,"language","log_stars","pr_size_log1p"]]
            y = dC["accepted"]
            pre = ColumnTransformer([("lang", OneHotEncoder(handle_unknown="ignore"), ["language"])],
                                    remainder="passthrough")
            clf = Pipeline([("pre", pre),
                            ("lr", LogisticRegression(max_iter=700, solver="lbfgs", class_weight="balanced"))])
            clf.fit(X,y)
            X1, X0 = X.copy(), X.copy()
            X1[clarify_col] = 1; X0[clarify_col] = 0
            me_c_val = float(clf.predict_proba(X1)[:,1].mean() - clf.predict_proba(X0)[:,1].mean())
            auc_val  = float(roc_auc_score(y, clf.predict_proba(X)[:,1]))
except Exception as e:
    print("Option-C Δprob/AUC skipped:", e)

# ---------- 5(a): Failure patterns & touched paths ----------
fail_cols = [c for c in df.columns if c.startswith("pat_")]
path_cols = [c for c in ["docs_only","touch_tests","touch_docs","touch_deps","touch_src"] if c in df.columns]

prev_5a = (pd.concat([df[fail_cols].mean(), df[path_cols].mean()])*100).sort_values(ascending=False).round(2) \
          if (fail_cols or path_cols) else pd.Series(dtype=float)

def acc_delta(col):
    g = df.groupby(col)["accepted"].mean()
    return float((g.get(1,np.nan) - g.get(0,np.nan))*100)

deltas_5a = pd.Series({c: acc_delta(c) for c in (fail_cols+path_cols)}).sort_values(ascending=False) \
            if (fail_cols or path_cols) else pd.Series(dtype=float)

top_prev_items = prev_5a.head(3).to_dict() if len(prev_5a) else {}
top_delta_items = deltas_5a.dropna().abs().sort_values(ascending=False).head(3)
top_delta_items = top_delta_items.index.map(lambda k: (k, deltas_5a[k])).tolist() if len(deltas_5a) else []

# ---------- 5(c): Security ----------
sec_prev = float(df["security_flag"].mean()*100) if "security_flag" in df.columns else np.nan
sec_acc_no = sec_acc_yes = np.nan
if "security_flag" in df.columns and df["security_flag"].nunique()==2:
    grp = df.groupby("security_flag")["accepted"].mean()*100
    sec_acc_no, sec_acc_yes = float(grp.get(0,np.nan)), float(grp.get(1,np.nan))

# time-to-merge medians (merged only), if timestamps exist
ttm_line = ""
if {"created_at","merged_at"}.issubset(df.columns):
    d_merged = df[df["accepted"]==1].copy()
    d_merged["created_at"] = pd.to_datetime(d_merged["created_at"], errors="coerce", utc=True)
    d_merged["merged_at"]  = pd.to_datetime(d_merged["merged_at"],  errors="coerce", utc=True)
    ttm = (d_merged["merged_at"] - d_merged["created_at"]).dt.total_seconds()/(3600*24)
    if "security_flag" in d_merged.columns and d_merged["security_flag"].nunique()==2 and ttm.notna().any():
        med0 = float(ttm[d_merged["security_flag"]==0].median())
        med1 = float(ttm[d_merged["security_flag"]==1].median())
        if not (np.isnan(med0) or np.isnan(med1)):
            ttm_line = f" Median time-to-merge among merged PRs is {med1:.2f} vs {med0:.2f} days (security vs. non-security)."

# ---------- compose text ----------
N = fmt_num(len(df))
p_acc_overall = fmt_pct(acc_overall)
p_acc0, p_acc1 = fmt_pct(acc0), fmt_pct(acc1)
p_gap = fmt_pp(acc1-acc0) if (not np.isnan(acc1) and not np.isnan(acc0)) else "n/a"
p_gap_lo, p_gap_hi = fmt_pp(gap_lo), fmt_pp(gap_hi)
p_me_c = fmt_pp(me_c_val) if me_c_val is not None else "n/a"
p_auc  = f"{auc_val:.3f}" if (auc_val is not None and not np.isnan(auc_val)) else "n/a"

h = "# Results for AIDev RQ 5(a), 5(b), 5(c)\n\n"
stamp = f"_Generated {dt.datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}_\n\n"

para_5b = (
    f"**RQ 5(b) — Early signals (clarification) and acceptance.** "
    f"Using the early textual cue **`{clarify_col}`**, we analyze **N={N}** agentic PRs. "
    f"Overall acceptance is **{p_acc_overall}**. PRs without the cue are accepted at **{p_acc0}**, "
    f"versus **{p_acc1}** with the cue — a raw difference of **{p_gap} pp** "
    f"(bootstrap 95% CI **[{p_gap_lo}, {p_gap_hi}]**). A regularized logistic model controlling for "
    f"repository popularity and PR size estimates an average change of **{p_me_c} pp** in acceptance "
    f"probability when toggling the cue (**AUC={p_auc}**)."
)

if top_prev_items:
    prev_bits = ", ".join([f"`{k}` ({v:.1f}%)" for k,v in top_prev_items.items()])
else:
    prev_bits = "no high-prevalence patterns in this slice"

if top_delta_items:
    delta_bits = ", ".join([f"`{k}`: {val:+.2f} pp" for k,val in top_delta_items])
else:
    delta_bits = "no patterns showed a stable acceptance difference"

para_5a = (
    f"**RQ 5(a) — Failure patterns and touched paths.** "
    f"We instrument failure/path signals from PR text and touched files. "
    f"The most prevalent signals are {prev_bits}. Acceptance differences by signal indicate {delta_bits}. "
    f"These associations highlight where agentic PRs most often struggle."
)

p_sec_prev   = "n/a" if np.isnan(sec_prev) else f"{sec_prev:.2f}%"
p_sec_acc_no = "n/a" if np.isnan(sec_acc_no) else f"{sec_acc_no:.2f}%"
p_sec_acc_yes= "n/a" if np.isnan(sec_acc_yes) else f"{sec_acc_yes:.2f}%"
para_5c = (
    f"**RQ 5(c) — Security-related signals.** "
    f"Using conservative textual and dependency-bump heuristics, **{p_sec_prev}** of PRs are security-flagged. "
    f"Their acceptance rate is **{p_sec_acc_yes}** vs **{p_sec_acc_no}** for non-flagged PRs."
    f"{ttm_line}"
)

md_text = h + stamp + "\n\n".join([fill(para_5b, 110), fill(para_5a, 110), fill(para_5c, 110)]) + "\n"
with open("results/results_5abc.md","w") as f:
    f.write(md_text)
print(md_text)
print("Saved:", "results/results_5abc.md")

# ---------- optional DOCX with plots (safe no-ops if lib/images missing) ----------
try:
    # !pip -q install python-docx
    from docx import Document
    from docx.shared import Inches
    doc = Document()
    doc.add_heading('AIDev — Results for RQ 5(a), 5(b), 5(c)', 0)
    for para in [para_5b, para_5a, para_5c]:
        doc.add_paragraph(para)

    # attach any figures you already saved
    figs = [
        "results/acceptance_rate_bar.png",
        "results/acceptance_by_language.png",
        "results/acceptance_by_quartile.png",
        "results/5a_prevalence_top10.png",
        "results/5a_acc_delta_top10.png",
        "results/5c_acceptance_bar.png",
    ]
    for img in figs:
        if os.path.exists(img):
            doc.add_picture(img, width=Inches(5.8))
    out_docx = "AIDev_5abc_Summary.docx"
    doc.save(out_docx)
    print("Saved:", out_docx)
except Exception as e:
    print("DOCX step skipped (install python-docx if needed).", e)


# Results for AIDev RQ 5(a), 5(b), 5(c)

_Generated 2025-09-26 09:33 UTC_

**RQ 5(b) — Early signals (clarification) and acceptance.** Using the early textual cue **`clarify_strict`**,
we analyze **N=932,791** agentic PRs. Overall acceptance is **84.71%**. PRs without the cue are accepted at
**84.78%**, versus **70.24%** with the cue — a raw difference of **-14.54 pp** (bootstrap 95% CI **[-15.80,
-13.27]**). A regularized logistic model controlling for repository popularity and PR size estimates an
average change of **-20.61 pp** in acceptance probability when toggling the cue (**AUC=0.503**).

**RQ 5(a) — Failure patterns and touched paths.** We instrument failure/path signals from PR text and touched
files. The most prevalent signals are `pat_docs` (20.2%), `pat_build_ci` (18.4%), `pat_lint` (16.4%).
Acceptance differences by signal indicate `pat_docs`: -7.01 pp, `pat_revert_hotfix`: -6.22 pp, `pat_build_ci`:
-4.00 pp. These associations highlight where agentic PRs most often strugg