In [2]:
# Bloque 0: imports y setup
import numpy as np
import pandas as pd
from tqdm import tqdm
import defs
import defs2
import funciones_aux
from funciones_aux import eval_one_sim, bowley_skew, excess_kurtosis, run_resumenes, run_tests, victorias_grouped, columnas_comparacion_default
import matplotlib.pyplot as plt
import seaborn as sns


RANDOM_SEED = 123
rng = np.random.default_rng(RANDOM_SEED)

In [3]:
# Bloque 3: loop principal de simulación (1000)

S = 1000
rows = []
for s in tqdm(range(1, S+1)):
    rows.append(eval_one_sim(s, rng))

df_ab = pd.DataFrame(rows)

print("✅ Simulaciones completadas:", len(df_ab))
print("Columnas:", list(df_ab.columns))
df_ab.head()


  1%|          | 6/1000 [00:07<20:28,  1.24s/it]



KeyboardInterrupt: 

In [None]:
# Bloque 4: Resumen de victorias
comparisons = columnas_comparacion_default()
# Resumen global de victorias (proporciones y diferencias promedio)
df_resumenes = run_resumenes(df_ab, comparisons, threshold=0.0)
df_resumenes

In [None]:
# Proporciones de victorias por subgrupos
cols_diffs = [c for c, _ in comparisons]

victorias_familia   = victorias_grouped(df_ab, "familia",       cols_diffs, threshold=0.0)
victorias_outliers  = victorias_grouped(df_ab, "tuvo_outliers", cols_diffs, threshold=0.0)
victorias_rango_n   = victorias_grouped(df_ab, "rango_n",       cols_diffs, threshold=0.0)

print("--- Proporción de victorias por familia ---")
display(victorias_familia)

print("\n--- Proporción de victorias por outliers ---")
display(victorias_outliers)

print("\n--- Proporción de victorias por rango_n ---")
display(victorias_rango_n)


In [None]:
# Tests estadísticos (Wilcoxon unilateral)
df_tests = run_tests(df_ab, comparisons, alternative="greater")
df_tests


In [None]:
# # (Opcional) Persistir tablas a CSV para el apéndice del experimento
# df_resumenes.to_csv("ab_resumenes.csv", index=False)
# victorias_familia.to_csv("ab_victorias_por_familia.csv", index=False)
# victorias_outliers.to_csv("ab_victorias_por_outliers.csv", index=False)
# victorias_rango_n.to_csv("ab_victorias_por_rangon.csv", index=False)
# df_tests.to_csv("ab_tests_wilcoxon.csv", index=False)
# print("CSV exportados ✅")


### Visualizaciones

In [None]:

# Configuración estética
sns.set(style="whitegrid", context="talk")

# --- 1. Dispersión: delta_mean vs delta_mp1 y delta_mp2 ---
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharex=True, sharey=True)

sns.scatterplot(
    data=df_ab, x="delta_mean", y="delta_mp1",
    hue="familia", alpha=0.6, ax=axes[0], legend=False
)
axes[0].plot([0, df_ab[["delta_mean","delta_mp1"]].max().max()],
             [0, df_ab[["delta_mean","delta_mp1"]].max().max()],
             'r--')
axes[0].set_title("V1 vs Media (delta)")
axes[0].set_xlabel("Δ Media")
axes[0].set_ylabel("Δ Métrica V1")

sns.scatterplot(
    data=df_ab, x="delta_mean", y="delta_mp2",
    hue="familia", alpha=0.6, ax=axes[1]
)
axes[1].plot([0, df_ab[["delta_mean","delta_mp2"]].max().max()],
             [0, df_ab[["delta_mean","delta_mp2"]].max().max()],
             'r--')
axes[1].set_title("V2 vs Media (delta)")
axes[1].set_xlabel("Δ Media")
axes[1].set_ylabel("Δ Métrica V2")

plt.tight_layout()
plt.show()


In [None]:
# --- 2. Histograma de mejoras directas ---
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

sns.histplot(df_ab["d_mp1_vs_mean"], bins=40, kde=True, ax=axes[0], color="skyblue")
axes[0].axvline(0, color="red", linestyle="--")
axes[0].set_title("V1 – Media (Δ estabilidad)")

sns.histplot(df_ab["d_mp2_vs_mean"], bins=40, kde=True, ax=axes[1], color="lightgreen")
axes[1].axvline(0, color="red", linestyle="--")
axes[1].set_title("V2 – Media (Δ estabilidad)")

sns.histplot(df_ab["d_v2_vs_v1"], bins=40, kde=True, ax=axes[2], color="orange")
axes[2].axvline(0, color="red", linestyle="--")
axes[2].set_title("V2 – V1 (Δ estabilidad)")

plt.tight_layout()
plt.show()


In [None]:
# --- 3. Proporciones de victorias por familia ---
cols_diffs = ["d_mp1_vs_mean", "d_mp2_vs_mean", "d_v2_vs_v1"]
prop_fam = (
    df_ab.groupby("familia")[cols_diffs]
         .apply(lambda g: (g > 0).mean())
         .reset_index()
         .melt(id_vars="familia", var_name="comparacion", value_name="prop_victorias")
)

plt.figure(figsize=(12, 6))
sns.barplot(data=prop_fam, x="familia", y="prop_victorias", hue="comparacion")
plt.axhline(0.5, color="red", linestyle="--")
plt.title("Proporción de victorias por familia")
plt.ylabel("Proporción de casos donde Δ > 0")
plt.ylim(0, 1)
plt.xticks(rotation=45)
plt.legend(title="Comparación")
plt.show()


In [None]:
# --- 4. Heatmap de victorias por (familia x outliers) ---
heat_data = (
    df_ab.groupby(["familia", "tuvo_outliers"])["d_v2_vs_v1"]
         .apply(lambda x: (x > 0).mean())
         .unstack()
)

plt.figure(figsize=(8, 6))
sns.heatmap(heat_data, annot=True, fmt=".2f", cmap="YlGnBu", cbar_kws={'label': 'Proporción V2 > V1'})
plt.title("Proporción V2 > V1 por familia y presencia de outliers")
plt.ylabel("Familia")
plt.xlabel("Tuvo Outliers")
plt.show()
