In [None]:
import pandas as pd
import numpy as np

df=pd.read_parquet("./final_data/pyfunctions_ai_classified.parquet")
df.head()

In [None]:
from verbosity_fp_analysis import analyze_verbosity_core

# df must have: modified_blocks (str), user_experience (float), true_label ('human'/'ai'), prediction (float; P(ai))
res = analyze_verbosity_core(
    df,
    ai_threshold=0.5,
    prediction_is_ai_prob=True,
    individual_features=['avg_line_len','blank_ratio','comment_ratio','docstring_len','n_tokens']
)

# 1) Correlations table (humans)
corr_tbl = res["corr_table"].copy().sort_values("variable").reset_index(drop=True)
corr_tbl
# If you want nicer rounding:
corr_tbl_fmt = corr_tbl.assign(
    spearman_rho=corr_tbl["spearman_rho"].round(3)
)
corr_tbl_fmt

# 2) Decile tables (humans)
deciles_vc_tbl = res["deciles_VC"].copy()
deciles_vs_tbl = res["deciles_VS"].copy()
# Optional: add percentage columns
for t in (deciles_vc_tbl, deciles_vs_tbl):
    t["fp_rate_pct"] = (t["fp_rate"]*100).round(1)
deciles_vc_tbl, deciles_vs_tbl

# 3) Logistic regression tables
models = res["models"]

# 3a) Coefficients per model (odds ratios included)
coef_base = models["baseline"]["coeffs"].copy()
coef_vs   = models["plus_VS"]["coeffs"].copy()
coef_full = models["full"]["coeffs"].copy()

# add 95% CI for ORs
def add_or_ci(df):
    lo = np.exp(df["coef"] - 1.96*df["se"])
    hi = np.exp(df["coef"] + 1.96*df["se"])
    out = df.copy()
    out["OR"] = df["odds_ratio"].round(3)
    out["OR_lo"] = lo.round(3)
    out["OR_hi"] = hi.round(3)
    return out[["term","coef","se","OR","OR_lo","OR_hi"]]

coef_base_tbl = add_or_ci(coef_base)
coef_vs_tbl   = add_or_ci(coef_vs)
coef_full_tbl = add_or_ci(coef_full)

# 3b) Model summary table (AIC/AUC)
summary_tbl = pd.DataFrame([
    {"model":"baseline", "AIC": models["baseline"]["AIC"], "AUC": models["baseline"]["AUC"]},
    {"model":"+VS",      "AIC": models["plus_VS"]["AIC"],  "AUC": models["plus_VS"]["AUC"]},
    {"model":"+VS+templated+inter", "AIC": models["full"]["AIC"], "AUC": models["full"]["AUC"]},
]).assign(AIC=lambda d: d["AIC"].round(2), AUC=lambda d: d["AUC"].round(3))

print(corr_tbl_fmt)

print(deciles_vc_tbl)

print(deciles_vs_tbl)

print(coef_base_tbl),
print(coef_vs_tbl)
print(coef_full_tbl)
print(summary_tbl)

In [None]:
# ONE SNIPPET
# (A) Facet plot of FP deciles for: Composite Verbosity (VS), Composite Verbosity + Size (VC), and Templatedness
# (B) Combined regression LaTeX table using statsmodels + stargazer (multiple models in one table)
#
# Prereqs (once):
#   pip install matplotlib pandas numpy statsmodels stargazer
#
# Assumes you've already run:
#   from verbosity_fp_analysis import analyze_verbosity_core
#   res = analyze_verbosity_core(df, ai_threshold=0.5, prediction_is_ai_prob=True)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from stargazer.stargazer import Stargazer

# ===== Pull objects =====
features  = res["features"].copy()
dec_vc    = res["deciles_VC"].copy()
dec_vs    = res["deciles_VS"].copy()

# ===== (A) FACET PLOT: FP rate by decile (VS, VC, Templatedness) =====
humans = features[features["true_label"] == "human"].copy()
humans["FP"] = (humans["predicted_label"] == "ai").astype(int)

def fp_deciles(humans_df, series):
    q = pd.qcut(series, q=10, labels=False, duplicates="drop")
    out = humans_df.groupby(q)["FP"].agg(["mean","count"]).rename(columns={"mean":"fp_rate"}).reset_index(names="decile")
    out["fp_rate_pct"] = out["fp_rate"] * 100.0
    return out.sort_values("decile").reset_index(drop=True)

for t in (dec_vc, dec_vs):
    if "fp_rate_pct" not in t.columns:
        t["fp_rate_pct"] = t["fp_rate"] * 100.0

dec_tmp = fp_deciles(humans, humans["templatedness"])

fig, axes = plt.subplots(1, 3, figsize=(12, 3.8), sharey=True)

# VS
axes[0].plot(dec_vs["decile"], dec_vs["fp_rate_pct"], marker="o")
axes[0].axhline(0, linewidth=1)
axes[0].set_ylim(0, 18)
axes[0].set_title("Composite Verbosity (VS)")
axes[0].set_xlabel("Decile")
axes[0].set_ylabel("False positive rate (%)")
axes[0].set_xticks(range(10))
axes[0].set_xticklabels([str(i) for i in range(1, 11)])

# VC
axes[1].plot(dec_vc["decile"], dec_vc["fp_rate_pct"], marker="o")
axes[1].axhline(0, linewidth=1)
axes[1].set_ylim(0, 18)
axes[1].set_title("Composite Verbosity + Size (VC)")
axes[1].set_xlabel("Decile")
axes[1].set_xticks(range(10))
axes[1].set_xticklabels([str(i) for i in range(1, 11)])

# Templatedness
axes[2].plot(dec_tmp["decile"], dec_tmp["fp_rate_pct"], marker="o")
axes[2].axhline(0, linewidth=1)
axes[2].set_ylim(0, 18)
axes[2].set_title("Templatedness")
axes[2].set_xlabel("Decile")
axes[2].set_xticks(range(10))
axes[2].set_xticklabels([str(i) for i in range(1, 11)])

fig.tight_layout()
# fig.savefig("fp_rate_deciles_facets.png", dpi=300)
plt.show()

# ===== (B) Combined regression LaTeX table with statsmodels + stargazer =====
def z(s):
    s = s.astype(float)
    return (s - s.mean()) / (s.std(ddof=0) + 1e-12)

humans["z_tokens"]      = z(humans["n_tokens"])
humans["z_complexity"]  = z(humans["complexity"])
humans["z_experience"]  = z(humans["user_experience"])
humans["z_VS"]          = z(humans["verbosity_style_index"])
humans["z_templated"]   = z(humans["templatedness"])
humans["z_interaction"] = humans["z_VS"] * humans["z_templated"]

def fit_logit(y, Xcols, data):
    X = data[Xcols].copy()
    X = sm.add_constant(X, has_constant="add")
    model = sm.Logit(data[y].astype(float), X)
    return model.fit(disp=0)

m1 = fit_logit("FP", ["z_tokens","z_complexity","z_experience"], humans)
m2 = fit_logit("FP", ["z_tokens","z_complexity","z_experience","z_VS"], humans)
m3 = fit_logit("FP", ["z_tokens","z_complexity","z_experience","z_VS","z_templated","z_interaction"], humans)

sg = Stargazer([m1, m2, m3])
sg.title("Logit: False positive (human code)")
sg.custom_columns(["Baseline", "+ Composite Verbosity", "+ Composite Verbosity, Templatedness, Interaction"], [1,1,1])
sg.covariate_order([
    "const","z_tokens","z_complexity","z_experience","z_VS","z_templated","z_interaction"
])
sg.rename_covariates({
    "const": "Intercept",
    "z_tokens": "Tokens (z)",
    "z_complexity": "Complexity (z)",
    "z_experience": "Experience (z)",
    "z_VS": "Composite Verbosity (z)",
    "z_templated": "Templatedness (z)",
    "z_interaction": "Composite Verbosity Ã— Templatedness"
})

latex_table = sg.render_latex()
with open("reg_stargazer.tex", "w", encoding="utf-8") as f:
    f.write(latex_table)

sg

In [None]:
import numpy as np

def add_binomial_ci(df, p_col="fp_rate", n_col="count", z=1.96):
    # df needs columns: p_col in [0,1], n_col = decile counts
    p = df[p_col].astype(float).values
    n = df[n_col].astype(float).values
    se = np.sqrt(np.clip(p*(1-p)/np.maximum(n, 1.0), 0, None))
    lo = np.clip(p - z*se, 0, 1)
    hi = np.clip(p + z*se, 0, 1)
    out = df.copy()
    out["fp_lo"] = lo
    out["fp_hi"] = hi
    out["fp_rate_pct"] = p*100.0
    out["fp_lo_pct"]   = lo*100.0
    out["fp_hi_pct"]   = hi*100.0
    out = out.sort_values("decile").reset_index(drop=True)
    return out

# If your res["deciles_*"] tables already have "count" and "fp_rate", this is enough:
dec_vs_ci = add_binomial_ci(dec_vs)  # needs columns: decile, fp_rate, count
dec_vc_ci = add_binomial_ci(dec_vc)

# For templatedness we recomputed; it already has count:
dec_tmp_ci = add_binomial_ci(dec_tmp)


In [None]:
# import os
# import pandas as pd
# import matplotlib.pyplot as plt
# import numpy as np



# custom_style = {
#     # Font sizes
#     "axes.labelsize": 25,
#     "axes.titlesize": 20,
#     "xtick.labelsize": 20,
#     "ytick.labelsize": 20,

#     # Line and marker styles
#     "lines.linewidth": 3,
#     "lines.markersize": 8,
#     "lines.color": "black",
#     "errorbar.capsize": 5,

#     # Axes & spines
#     "axes.edgecolor": "black",
#     "axes.linewidth": 2,

#     # Tick styling
#     "xtick.color": "black",
#     "ytick.color": "black",
#     "xtick.major.width": 1.2,
#     "ytick.major.width": 1.2,

#     # Grid
#     "axes.grid": True,
#     "grid.color": "gray",
#     "grid.linewidth": 0.7,
#     "grid.linestyle": "--",
#     "grid.alpha": 0.6,

#     # Figure settings
#     "figure.figsize": (12, 8),
#     "figure.dpi": 300,
#     "figure.facecolor": "white"
# }


# plt.rcParams.update(custom_style)


In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 3.8), sharey=True)

def draw(ax, d, title):
    x = d["decile"].values
    y = d["fp_rate_pct"].values
    yerr = np.vstack([y - d["fp_lo_pct"].values, d["fp_hi_pct"].values - y])
    ax.errorbar(x, y, yerr=yerr, fmt='-o', capsize=3)  # no colors specified
    ax.axhline(0, linewidth=1)
    ax.set_ylim(0, 30)
    ax.set_title(title)
    ax.set_xlabel("Decile")
    ax.set_ylabel("False positive rate (%)")
    ax.set_xticks(range(10))
    ax.set_xticklabels([str(i) for i in range(1, 11)])

draw(axes[0], dec_vs_ci, "Composite Verbosity (VS)")
draw(axes[1], dec_vc_ci, "Composite Verbosity + Size (VC)")
draw(axes[2], dec_tmp_ci, "Templatedness")

fig.tight_layout()
# plt.savefig("/Users/Danio001/Downloads/verbosity.pdf",dpi=300)
plt.show()
