In [1]:
from google.colab import files
import os

uploaded = files.upload()

# Ajusta nombres si tus archivos se llaman distinto en Colab:
HUMAN_XLSX = "valleys_peaks_final_results.xlsx"
MODEL_CSV  = "tie_dialog_6_dialogues_combined.csv"
HUMAN_SHEET = "valleys_peaks_template.csv"

out_dir = "tie_dialog_region_metrics_optionA"
os.makedirs(out_dir, exist_ok=True)

print("OK. Archivos en el entorno:", list(uploaded.keys()))


Saving valleys_peaks_final_results.xlsx to valleys_peaks_final_results.xlsx
Saving tie_dialog_6_dialogues_combined.csv to tie_dialog_6_dialogues_combined.csv
OK. Archivos en el entorno: ['valleys_peaks_final_results.xlsx', 'tie_dialog_6_dialogues_combined.csv']


In [2]:
import pandas as pd, numpy as np, itertools, random
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# === Pre-especificado (Opción A) ===
W = 2                 # humanos: puntos -> regiones [t-W, t+W]
CONS_K = 3            # consenso humano: >=3/5 cubren turno
B = 2000              # baseline permutaciones (random regions)
LAG_MIN, LAG_MAX = -5, 5
annotators = [1,2,3,4,5]

# Repair (peak) desde Ct:
Q_HIGH = 0.85

# Rupture (valley) desde dCt:
Q_DLOW = 0.15


In [3]:
def merge_intervals(intervals):
    if not intervals:
        return []
    intervals = sorted(intervals)
    merged = [intervals[0]]
    for a,b in intervals[1:]:
        la,lb = merged[-1]
        if a <= lb + 1:
            merged[-1] = (la, max(lb,b))
        else:
            merged.append((a,b))
    return merged

def points_to_intervals(points, w, T):
    intervals=[]
    for t in sorted(set(points)):
        a=max(0, t-w)
        b=min(T-1, t+w)
        intervals.append((a,b))
    return merge_intervals(intervals)

def mask_to_intervals(mask):
    T=len(mask)
    intervals=[]
    i=0
    while i<T:
        if mask[i]:
            j=i
            while j+1<T and mask[j+1]:
                j+=1
            intervals.append((i,j))
            i=j+1
        else:
            i+=1
    return intervals

def intervals_to_mask(intervals, T):
    m = np.zeros(T, dtype=bool)
    for a,b in intervals:
        m[a:b+1] = True
    return m

def iou_masks(A, B):
    if A.sum()==0 and B.sum()==0:
        return 1.0
    if A.sum()==0 or B.sum()==0:
        return 0.0
    inter = np.logical_and(A,B).sum()
    uni   = np.logical_or(A,B).sum()
    return float(inter)/float(uni) if uni else 0.0

def overlap_prf(A, B):
    if A.sum()==0 and B.sum()==0:
        return 1.0,1.0,1.0
    if A.sum()==0 or B.sum()==0:
        return 0.0,0.0,0.0
    tp = np.logical_and(A,B).sum()
    prec = tp / A.sum() if A.sum() else 0.0
    rec  = tp / B.sum() if B.sum() else 0.0
    f1 = (2*prec*rec/(prec+rec)) if (prec+rec) else 0.0
    return float(prec), float(rec), float(f1)

def shift_mask(mask, k):
    T=len(mask)
    out = np.zeros(T, dtype=bool)
    if k==0:
        return mask.copy()
    if k>0:
        out[k:] = mask[:T-k]
    else:
        out[:T+k] = mask[-k:]
    return out


In [4]:
def random_segments_like(intervals, T, rng):
    lengths = [(b-a+1) for a,b in intervals]
    rand=[]
    for L in lengths:
        if L>=T:
            rand.append((0, T-1))
            continue
        start = rng.randrange(0, T-L+1)
        rand.append((start, start+L-1))
    return merge_intervals(rand)

def perm_test(model_intervals, human_mask, T, B=2000, seed=0):
    rng = random.Random(int(seed))
    model_mask = intervals_to_mask(model_intervals, T)
    obs_iou = iou_masks(model_mask, human_mask)
    _,_, obs_f1 = overlap_prf(model_mask, human_mask)

    ious=[]
    f1s=[]
    for _ in range(B):
        rand_int = random_segments_like(model_intervals, T, rng)
        rand_mask = intervals_to_mask(rand_int, T)
        ious.append(iou_masks(rand_mask, human_mask))
        _,_, f1 = overlap_prf(rand_mask, human_mask)
        f1s.append(f1)

    ious=np.array(ious); f1s=np.array(f1s)
    p_iou = (np.sum(ious >= obs_iou) + 1) / (B + 1)
    p_f1  = (np.sum(f1s  >= obs_f1)  + 1) / (B + 1)

    return {
        "obs_iou": float(obs_iou), "obs_f1": float(obs_f1),
        "base_iou_mean": float(ious.mean()), "base_iou_sd": float(ious.std(ddof=1)),
        "base_f1_mean": float(f1s.mean()), "base_f1_sd": float(f1s.std(ddof=1)),
        "p_iou": float(p_iou), "p_f1": float(p_f1),
        "ious": ious, "f1s": f1s
    }


In [5]:
human_all = pd.read_excel(HUMAN_XLSX, sheet_name=HUMAN_SHEET)
model_all = pd.read_csv(MODEL_CSV)

coh_col = "Ct" if "Ct" in model_all.columns else ("ct" if "ct" in model_all.columns else None)
assert coh_col is not None, "No encuentro columna de coherencia (Ct o ct)."

print("OK. Signal:", coh_col, "| Human:", human_all.shape, "| Model:", model_all.shape)


OK. Signal: Ct | Human: (180, 14) | Model: (139, 42)


In [6]:
rows_model_vs_h = []
rows_iaa = []
rows_lag = []
rows_baseline = []

pdf_path = os.path.join(out_dir, "region_metrics_report_optionA.pdf")
pdf = PdfPages(pdf_path)

for did in sorted(human_all["dialogue_id"].unique()):
    did_int = int(did)
    h = human_all[human_all["dialogue_id"]==did].sort_values("turn")
    m = model_all[model_all["dialogue_id"]==did].sort_values("turn")

    T = int(max(h["turn"].max(), m["turn"].max()))

    Ct = m[coh_col].to_numpy(dtype=float)
    if len(Ct) < T:
        Ct = np.pad(Ct, (0, T-len(Ct)), constant_values=np.nan)

    Ct_f = Ct[np.isfinite(Ct)]
    thr_high = float(np.quantile(Ct_f, Q_HIGH)) if len(Ct_f) else np.nan

    # dCt[t] = Ct[t] - Ct[t-1] (t>=1). dCt[0]=nan
    dCt = np.full(T, np.nan, dtype=float)
    for t in range(1, T):
        if np.isfinite(Ct[t]) and np.isfinite(Ct[t-1]):
            dCt[t] = Ct[t] - Ct[t-1]
    dCt_f = dCt[np.isfinite(dCt)]
    thr_dlow = float(np.quantile(dCt_f, Q_DLOW)) if len(dCt_f) else np.nan

    for label in ["valley","peak"]:
        # Humanos: máscaras regionales por anotador
        ann_masks = {}
        for a in annotators:
            col = f"annotator_{a}_{label}"
            pts = h.loc[h[col]==1, "turn"].astype(int).tolist()
            pts0 = [t-1 for t in pts]  # 0-index
            intervals = points_to_intervals(pts0, W, T)
            ann_masks[a] = intervals_to_mask(intervals, T)

        # IAA (pares)
        pair_ious=[]
        pair_f1=[]
        for a,b_ in itertools.combinations(annotators, 2):
            pair_ious.append(iou_masks(ann_masks[a], ann_masks[b_]))
            _,_, f1 = overlap_prf(ann_masks[a], ann_masks[b_])
            pair_f1.append(f1)

        rows_iaa.append({
            "dialogue_id": did_int, "label": label, "W": W,
            "IAA_IoU_mean": float(np.mean(pair_ious)),
            "IAA_IoU_sd": float(np.std(pair_ious, ddof=1)) if len(pair_ious)>1 else 0.0,
            "IAA_F1_mean": float(np.mean(pair_f1)),
            "IAA_F1_sd": float(np.std(pair_f1, ddof=1)) if len(pair_f1)>1 else 0.0,
            "n_pairs": len(pair_ious)
        })

        # Consenso humano >=K/5
        stack = np.stack([ann_masks[a] for a in annotators], axis=0)
        consensus = (stack.sum(axis=0) >= CONS_K)
        consensus_intervals = mask_to_intervals(consensus)

        # Modelo (Opción A fija)
        if label == "peak":
            model_mask = np.isfinite(Ct) & (Ct >= thr_high)
        else:
            model_mask = np.isfinite(dCt) & (dCt <= thr_dlow)

        model_intervals = mask_to_intervals(model_mask)

        # Métricas modelo vs consenso
        iou = iou_masks(model_mask, consensus)
        prec, rec, f1 = overlap_prf(model_mask, consensus)

        rows_model_vs_h.append({
            "dialogue_id": did_int, "label": label,
            "W": W, "CONS_K": CONS_K, "Q_HIGH": Q_HIGH, "Q_DLOW": Q_DLOW,
            "thr_high": thr_high, "thr_dlow": thr_dlow,
            "model_covered_turns": int(model_mask.sum()),
            "human_consensus_covered_turns": int(consensus.sum()),
            "IoU": float(iou), "Precision": float(prec), "Recall": float(rec), "F1": float(f1),
            "model_segments": len(model_intervals),
            "human_segments": len(consensus_intervals),
        })

        # Baseline random regions
        base = perm_test(model_intervals, consensus, T, B=B, seed=2026 + did_int*10 + (0 if label=="valley" else 1))
        rows_baseline.append({
            "dialogue_id": did_int, "label": label,
            "W": W, "CONS_K": CONS_K, "Q_HIGH": Q_HIGH, "Q_DLOW": Q_DLOW,
            "obs_IoU": base["obs_iou"], "obs_F1": base["obs_f1"],
            "base_IoU_mean": base["base_iou_mean"], "base_IoU_sd": base["base_iou_sd"],
            "base_F1_mean": base["base_f1_mean"], "base_F1_sd": base["base_f1_sd"],
            "p_IoU": base["p_iou"], "p_F1": base["p_f1"],
            "B": B,
            "model_segments": len(model_intervals),
            "model_covered_turns": int(model_mask.sum())
        })

        # Best lag (precursor)
        best = {"k": None, "iou": -1.0, "f1": -1.0, "prec": None, "rec": None}
        for k in range(LAG_MIN, LAG_MAX+1):
            shifted = shift_mask(model_mask, k)
            iou_k = iou_masks(shifted, consensus)
            p_k, r_k, f1_k = overlap_prf(shifted, consensus)
            if iou_k > best["iou"]:
                best = {"k": k, "iou": float(iou_k), "f1": float(f1_k), "prec": float(p_k), "rec": float(r_k)}

        rows_lag.append({
            "dialogue_id": did_int, "label": label,
            "W": W, "CONS_K": CONS_K, "Q_HIGH": Q_HIGH, "Q_DLOW": Q_DLOW,
            "best_lag_k": int(best["k"]),
            "IoU_at_best_lag": best["iou"],
            "F1_at_best_lag": best["f1"],
            "Precision_at_best_lag": best["prec"],
            "Recall_at_best_lag": best["rec"],
        })

        # PDF: señal + regiones
        if label == "peak":
            series = Ct
            ylabel = "Ct"
            thr = thr_high
            title = f"Diálogo {did_int} — PEAK (repair) = Ct >= Q{int(Q_HIGH*100)}"
        else:
            series = dCt
            ylabel = "dCt"
            thr = thr_dlow
            title = f"Diálogo {did_int} — VALLEY (rupture) = dCt <= Q{int(Q_DLOW*100)}"

        fig = plt.figure()
        plt.plot(np.arange(1, T+1), series, label=ylabel)
        if np.isfinite(thr):
            plt.axhline(thr, linestyle="--", label="threshold")
        for a,b in model_intervals:
            plt.axvspan(a+1, b+1, alpha=0.2)
        plt.title(title)
        plt.xlabel("Turno")
        plt.ylabel(ylabel)
        plt.legend()
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)

        # PDF: baseline hist
        fig = plt.figure()
        plt.hist(base["ious"], bins=30)
        plt.axvline(base["obs_iou"])
        plt.title(f"Baseline IoU — Diálogo {did_int} — {label} (B={B})")
        plt.xlabel("IoU (regiones aleatorias)")
        plt.ylabel("conteo")
        pdf.savefig(fig, bbox_inches="tight")
        plt.close(fig)

pdf.close()

df_model_vs_h = pd.DataFrame(rows_model_vs_h)
df_iaa = pd.DataFrame(rows_iaa)
df_base = pd.DataFrame(rows_baseline)
df_lag = pd.DataFrame(rows_lag)

print("Hecho:", df_model_vs_h.shape, df_iaa.shape, df_base.shape, df_lag.shape)


Hecho: (12, 16) (12, 8) (12, 17) (12, 11)


In [7]:
def agg_summary(df, cols, group="label"):
    out=[]
    for lab,g in df.groupby(group):
        row={"label": lab}
        for c in cols:
            row[c+"_mean"]=float(g[c].mean())
            row[c+"_sd"]=float(g[c].std(ddof=1)) if len(g)>1 else 0.0
        out.append(row)
    return pd.DataFrame(out)

sum_model = agg_summary(df_model_vs_h, ["IoU","F1","Precision","Recall"])
sum_iaa   = agg_summary(df_iaa, ["IAA_IoU_mean","IAA_F1_mean"])
sum_base  = agg_summary(df_base, ["obs_IoU","obs_F1","base_IoU_mean","base_F1_mean","p_IoU","p_F1"])
sum_lag   = agg_summary(df_lag, ["best_lag_k","IoU_at_best_lag","F1_at_best_lag"])

# CSVs
df_model_vs_h.to_csv(os.path.join(out_dir, "per_dialogue_model_vs_consensus_optionA.csv"), index=False)
df_iaa.to_csv(os.path.join(out_dir, "per_dialogue_IAA_optionA.csv"), index=False)
df_base.to_csv(os.path.join(out_dir, "per_dialogue_baseline_optionA.csv"), index=False)
df_lag.to_csv(os.path.join(out_dir, "per_dialogue_best_lag_optionA.csv"), index=False)

# Excel master
xlsx_out = os.path.join(out_dir, "ALL_RESULTS_optionA.xlsx")
with pd.ExcelWriter(xlsx_out, engine="openpyxl") as w:
    df_model_vs_h.to_excel(w, sheet_name="model_vs_consensus", index=False)
    df_iaa.to_excel(w, sheet_name="human_human_IAA", index=False)
    df_base.to_excel(w, sheet_name="baseline_random_regions", index=False)
    df_lag.to_excel(w, sheet_name="best_lag", index=False)
    sum_model.to_excel(w, sheet_name="SUMMARY_model", index=False)
    sum_iaa.to_excel(w, sheet_name="SUMMARY_IAA", index=False)
    sum_base.to_excel(w, sheet_name="SUMMARY_baseline", index=False)
    sum_lag.to_excel(w, sheet_name="SUMMARY_lag", index=False)

print("Listo:", xlsx_out)
display(sum_model); display(sum_base); display(sum_lag)

# Descargar archivos principales
from google.colab import files
files.download(xlsx_out)
files.download(pdf_path)


Listo: tie_dialog_region_metrics_optionA/ALL_RESULTS_optionA.xlsx


Unnamed: 0,label,IoU_mean,IoU_sd,F1_mean,F1_sd,Precision_mean,Precision_sd,Recall_mean,Recall_sd
0,peak,0.189947,0.075928,0.313447,0.109244,0.819444,0.213546,0.196495,0.076195
1,valley,0.121098,0.072476,0.209562,0.120377,0.527778,0.323465,0.131944,0.076924


Unnamed: 0,label,obs_IoU_mean,obs_IoU_sd,obs_F1_mean,obs_F1_sd,base_IoU_mean_mean,base_IoU_mean_sd,base_F1_mean_mean,base_F1_mean_sd,p_IoU_mean,p_IoU_sd,p_F1_mean,p_F1_sd
0,peak,0.189947,0.075928,0.313447,0.109244,0.1137,0.014175,0.195676,0.022019,0.376895,0.273078,0.376895,0.273078
1,valley,0.121098,0.072476,0.209562,0.120377,0.107512,0.017277,0.187887,0.030731,0.586623,0.330573,0.586623,0.330573


Unnamed: 0,label,best_lag_k_mean,best_lag_k_sd,IoU_at_best_lag_mean,IoU_at_best_lag_sd,F1_at_best_lag_mean,F1_at_best_lag_sd
0,peak,-3.5,1.224745,0.220055,0.041968,0.359099,0.056902
1,valley,-4.333333,0.516398,0.231672,0.064148,0.372629,0.082126


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>