In [9]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from statsmodels.stats.power import NormalIndPower
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import gradio as gr

# 1. Generate Synthetic Dataset
def generate_data(n=5000):
    np.random.seed(42)
    variants = list("ABCD")
    df = pd.DataFrame({
        "user_id": range(1, n + 1),
        "variant": np.random.choice(variants, n),
        "subject_line": np.random.choice(["Sale 🔥", "New Arrivals", "Just For You", "Limited Time"], n),
        "send_time": np.random.choice(pd.date_range("2025-07-01", periods=24, freq="H"), n),
        "device": np.random.choice(["mobile", "desktop", "tablet"], n, p=[0.6, 0.35, 0.05]),
        "region": np.random.choice(["Tier-1", "Tier-2", "Tier-3"], n, p=[0.4, 0.4, 0.2]),
    })
    lift = {"A": 0.00, "B": 0.03, "C": 0.07, "D": 0.02}
    df["opened"] = df["variant"].apply(lambda v: np.random.binomial(1, 0.10 + lift[v]))
    df["clicked"] = df.apply(lambda r: np.random.binomial(1, 0.30 * r.opened), axis=1)
    df["converted"] = df.apply(lambda r: np.random.binomial(1, 0.40 * r.clicked), axis=1)
    df["spam_flag"] = np.random.binomial(1, 0.002, n)
    df["unsubscribed"] = np.random.binomial(1, 0.005, n)
    return df

df = generate_data()

# 2. Frequentist A/B Testing
def frequentist_test(df, metric="converted"):
    piv = df.pivot_table(index="variant", values=metric, aggfunc="mean")
    pairs = [(a, b) for i, a in enumerate(piv.index) for b in piv.index[i+1:]]
    results = []
    for a, b in pairs:
        t, p = ttest_ind(df[df.variant == a][metric], df[df.variant == b][metric])
        results.append({"pair": f"{a} vs {b}", "p": p, "t": t})
    return pd.DataFrame(results)

# 3. Bayesian Posterior
def bayesian_posteriors(df, metric="converted"):
    post = {}
    for v, grp in df.groupby("variant"):
        s = grp[metric].sum()
        f = len(grp) - s
        post[v] = (1 + s, 1 + f)
    return post

def probability_superior(post, a, b, sims=10000):
    a_samples = np.random.beta(*post[a], sims)
    b_samples = np.random.beta(*post[b], sims)
    return float((b_samples > a_samples).mean())

# 4. Auto Winner
def auto_winner(df, metric="converted", alpha=0.05, min_users=500):
    ft = frequentist_test(df, metric)
    best = df.groupby("variant")[metric].mean().idxmax()
    if (ft["p"] < alpha).all() and (df["variant"].value_counts() >= min_users).all():
        return best
    return None

# 5. Power Calculation
def power_calc(p1, p2, alpha=0.05, power=0.8):
    es = abs(p2 - p1)
    analysis = NormalIndPower()
    n = analysis.solve_power(
        effect_size=es / np.sqrt((p1*(1-p1) + p2*(1-p2)) / 2),
        alpha=alpha, power=power
    )
    return int(np.ceil(n))

# 6. ML Segmentation
def segment_uplift(df, features, metric="converted"):
    X = pd.get_dummies(df[features], drop_first=True)
    y = df[metric]
    model = DecisionTreeClassifier(max_depth=4).fit(X, y)
    return classification_report(y, model.predict(X), zero_division=0)

# 7. Plot Dashboard
def build_plot(df):
    grp = df.groupby("variant")[["opened", "clicked", "converted"]].mean().reset_index()
    fig = make_subplots(rows=1, cols=1)
    for col in ["opened", "clicked", "converted"]:
        fig.add_trace(go.Bar(name=col.capitalize(), x=grp["variant"], y=grp[col]), row=1, col=1)
    fig.update_layout(barmode="group", title="📊 Engagement Metrics by Variant",
                      yaxis_title="Rate", xaxis_title="Variant", height=400)
    return fig

# 8. Main Dashboard Logic
def dashboard():
    freq_df = frequentist_test(df)
    post = bayesian_posteriors(df)
    bayes_prob = probability_superior(post, "A", "B")
    winner = auto_winner(df)
    guard_rails = df.groupby("variant")[["spam_flag", "unsubscribed"]].mean() * 100
    needed_n = power_calc(df[df.variant == "A"]["converted"].mean(),
                          df[df.variant == "A"]["converted"].mean() + 0.03)
    seg_report = segment_uplift(df, ["variant", "device", "region"])

    html = (
        "<h4>📌 Group Means</h4>"
        + df.groupby("variant")[["opened", "clicked", "converted"]].mean().to_html(float_format="%.2f")
        + "<br><h4>🧪 Frequentist Results</h4>"
        + freq_df.to_html(index=False, float_format="%.4f")
        + f"<br><h4>🔍 Bayesian</h4>P(B > A) = {bayes_prob:.3f}"
        + f"<br><h4>🏆 Auto-Winner</h4>{winner or '—'}"
        + "<br><h4>🚧 Guardrails</h4>"
        + guard_rails.to_html(float_format="%.2f")
        + f"<br><h4>📈 Power Calculation</h4>Approx. {needed_n} users needed per variant"
        + "<br><h4>🧠 ML Segmentation Report</h4><pre>" + seg_report + "</pre>"
    )
    return build_plot(df), html

# 9. Gradio App Launch
gr.Interface(
    fn=dashboard,
    inputs=[],
    outputs=[gr.Plot(label="📊 Variant Metrics"), gr.HTML(label="📋 Insights & Reports")],
    title="📧 Email A/B/n Testing Dashboard (with All Features)",
    description="Includes subject line testing, Bayesian stats, auto-winner, segmentation, and more."
).launch(share=False)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.
* To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



In [11]:
!pip install gradio plotly pandas numpy scipy scikit-learn statsmodels




In [12]:
pip install gradio plotly pandas numpy scipy scikit-learn statsmodels




In [16]:
# 📧 Email A/B/n Testing Project – Final Version with Gradio (All Features)

import sys
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from statsmodels.stats.power import NormalIndPower
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import gradio as gr

# 1. Generate Data
def generate_data(n=5000):
    np.random.seed(42)
    variants = list("ABCD")
    df = pd.DataFrame({
        "user_id": range(1, n + 1),
        "variant": np.random.choice(variants, n),
        "subject_line": np.random.choice(["Sale 🔥", "New Arrivals", "Just For You", "Limited Time"], n),
        "send_time": np.random.choice(pd.date_range("2025-07-01", periods=24, freq="H"), n),
        "device": np.random.choice(["mobile", "desktop", "tablet"], n, p=[0.60, 0.35, 0.05]),
        "region": np.random.choice(["Tier-1", "Tier-2", "Tier-3"], n, p=[0.40, 0.40, 0.20]),
    })
    lift = {"A": 0.00, "B": 0.03, "C": 0.07, "D": 0.02}
    df["opened"]       = df["variant"].apply(lambda v: np.random.binomial(1, 0.10 + lift[v]))
    df["clicked"]      = df.apply(lambda r: np.random.binomial(1, 0.30 * r.opened), axis=1)
    df["converted"]    = df.apply(lambda r: np.random.binomial(1, 0.40 * r.clicked), axis=1)
    df["spam_flag"]    = np.random.binomial(1, 0.002, n)
    df["unsubscribed"] = np.random.binomial(1, 0.005, n)
    return df

df = generate_data()

# 2. Statistical Tests
def frequentist_test(df, metric="converted"):
    res = []
    variants = sorted(df["variant"].unique())
    for i in range(len(variants)):
        for j in range(i+1, len(variants)):
            a, b = variants[i], variants[j]
            t, p = ttest_ind(df[df.variant == a][metric], df[df.variant == b][metric])
            res.append(f"{a} vs {b}: p={p:.4f}")
    return "<br>".join(res)

def bayesian_prob(df, a="A", b="B", metric="converted", sims=10000):
    pa = df[df.variant == a][metric].mean()
    pb = df[df.variant == b][metric].mean()
    αa, βa = 1 + pa * 1000, 1 + (1 - pa) * 1000
    αb, βb = 1 + pb * 1000, 1 + (1 - pb) * 1000
    samples_a = np.random.beta(αa, βa, sims)
    samples_b = np.random.beta(αb, βb, sims)
    return np.mean(samples_b > samples_a)

def power_calc(df, delta=0.03):
    p1 = df[df.variant == "A"]["converted"].mean()
    p2 = p1 + delta
    effect_size = abs(p2 - p1) / np.sqrt((p1*(1-p1) + p2*(1-p2)) / 2)
    analysis = NormalIndPower()
    n = analysis.solve_power(effect_size=effect_size, alpha=0.05, power=0.8)
    return int(np.ceil(n))

def segmentation_report(df):
    X = pd.get_dummies(df[["variant", "device", "region"]], drop_first=True)
    y = df["converted"]
    model = DecisionTreeClassifier(max_depth=4).fit(X, y)
    report = classification_report(y, model.predict(X), zero_division=0)
    return report

# 3. Plotting
def plot_metrics(df):
    m = df.groupby("variant")[["opened", "clicked", "converted"]].mean().reset_index()
    fig = make_subplots(rows=1, cols=1)
    for col in ["opened", "clicked", "converted"]:
        fig.add_trace(go.Bar(x=m["variant"], y=m[col], name=col.capitalize()))
    fig.update_layout(barmode="group", title="📊 Engagement Metrics", xaxis_title="Variant", yaxis_title="Rate")
    return fig

# 4. Gradio Dashboard Callback
def dashboard():
    html = (
        "<h4>📌 Group Conversion Metrics</h4>"
        + df.groupby("variant")[["opened", "clicked", "converted"]].mean().to_html(float_format="%.2f")
        + "<br><h4>🧪 Frequentist Test Results</h4>" + frequentist_test(df)
        + f"<br><h4>🔍 Bayesian P(B > A)</h4> ≈ {bayesian_prob(df):.3f}"
        + f"<br><h4>📈 Power Calculation</h4>Required: ~{power_calc(df)} users/variant for +3% lift"
        + "<br><h4>🧠 Segmentation Report</h4><pre>" + segmentation_report(df) + "</pre>"
        + "<br><a href='email_data.csv' download>📩 Download Dataset CSV</a>"
    )
    return plot_metrics(df), html

# 5. Save CSV for Download
df.to_csv("/content/drive/MyDrive/email_campaign_data.csv", index=False)

# 6. Auto-detect if running in Google Colab
share_flag = 'google.colab' in sys.modules

# 7. Launch Gradio App
gr.Interface(
    fn=dashboard,
    inputs=[],
    outputs=[gr.Plot(label="📊 Metrics Chart"), gr.HTML(label="📋 Results & Insights")],
    title="📧 Email A/B/n Testing Dashboard",
    description="All-in-one Gradio dashboard: Frequentist & Bayesian A/B/n test, segmentation, power calc, CTR/Open/Conversion rate."
).launch(share=share_flag, show_error=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://793d8483501737fe0d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


