## Phase 3: A/B Experimentation & Statistical Testing.

Objectives in Phase 3:

1.Simulate an A/B test (e.g., different subscription offers).

2.Run t-tests and Bayesian analysis to see if results are significant.

3.Create clear charts to show uplift in metrics.

4.Export a clean PDF report.

In [2]:
# ab_experiment_analysis.py
"""
A/B experiment simulation & analysis using your flo_sports_customers.csv
Saves charts to ./charts/ and report to FloSports_AB_Test_Report.pdf
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib import colors

# -------------------------
# Config
# -------------------------
CSV_PATH = "flo_sports_customers.csv"
OUT_PDF = "FloSports_AB_Test_Report.pdf"
CHARTS_DIR = "charts"
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
os.makedirs(CHARTS_DIR, exist_ok=True)

# -------------------------
# 1) Load data
# -------------------------
df = pd.read_csv(CSV_PATH)

# Ensure we have a segment column (accept a few possible names)
segment_col = None
for candidate in ["SegmentName", "Segment", "segment"]:
    if candidate in df.columns:
        segment_col = candidate
        break

if segment_col is None:
    # if no segment exists, create one by simple kmeans-like split of tenure/watch
    # but to keep this simple, create random segments (this should be replaced by your real segments)
    df["segment"] = np.random.choice(["Loyal", "At Risk", "New", "YoungEngaged"], size=len(df))
    segment_col = "segment"

# Ensure churn column exists and binary 0/1
if "Churned" not in df.columns:
    # create a simulated churn flag if missing (10-30% baseline)
    df["Churned"] = np.random.binomial(1, 0.18, size=len(df))

# Ensure ARPU-like column exists; if not, simulate MonthlySpend using AvgMonthlyWatchHours
if "MonthlySpend" not in df.columns:
    # assume price per watch hour baseline plus noise
    if "AvgMonthlyWatchHours" in df.columns:
        base = df["AvgMonthlyWatchHours"].fillna(df["AvgMonthlyWatchHours"].mean()) * 0.8
    else:
        base = np.random.uniform(5, 20, size=len(df))
    df["MonthlySpend"] = np.clip(base + np.random.normal(0, 5, size=len(df)), 0.5, None)

# -------------------------
# 2) Create A/B groups within each segment
# -------------------------
def assign_ab_within_segment(df, seg_col="segment", frac_b=0.5, seed=RANDOM_SEED):
    df = df.copy()
    df["AB"] = np.nan
    rng = np.random.default_rng(seed)
    for seg, idx in df.groupby(seg_col).groups.items():
        ids = list(df.loc[idx].index)
        rng.shuffle(ids)
        cut = int(len(ids) * frac_b)
        df.loc[ids[:cut], "AB"] = "B"
        df.loc[ids[cut:], "AB"] = "A"
    return df

df = assign_ab_within_segment(df, seg_col=segment_col)

# -------------------------
# 3) Simulate treatment effect for group B
#    - Increase retention (lower churn) by some delta per segment (realistic varying effect)
#    - Increase MonthlySpend (ARPU) by a small percent for B
# -------------------------
# Base churn probability estimated from existing 'Churned'
base_churn_by_seg = df.groupby(segment_col)["Churned"].mean().to_dict()

# For the simulation: we will create 'Churned_post' and 'MonthlySpend_post'
df["Churned_post"] = df["Churned"].copy()
df["MonthlySpend_post"] = df["MonthlySpend"].copy()

# Treatment effect config (you can tweak magnitudes)
# For realism: effect is larger on 'At Risk' and 'New', smaller for 'Loyal'
treatment_effect_churn_reduction = {
    # segment_name: absolute reduction in churn probability for group B
    # if segment unseen here, default below will apply
    "At Risk": 0.08,
    "At-Risk": 0.08,
    "New": 0.10,
    "Loyal": 0.02,
    "YoungEngaged": 0.04,
    "Churned": 0.00,
    "Default": 0.05
}
treatment_effect_arpu_pct = {
    # percent increase in ARPU for group B
    "At Risk": 0.08,
    "At-Risk": 0.08,
    "New": 0.12,
    "Loyal": 0.02,
    "YoungEngaged": 0.05,
    "Churned": 0.00,
    "Default": 0.05
}

# Apply simulation: for A group, keep baseline; for B group, flip churn with reduced probability and raise spend
for seg, idx in df.groupby(segment_col).groups.items():
    seg_rows = df.loc[idx]
    # baseline churn rate
    baseline = base_churn_by_seg.get(seg, seg_rows["Churned"].mean())
    red = treatment_effect_churn_reduction.get(seg, treatment_effect_churn_reduction["Default"])
    arpu_pct = treatment_effect_arpu_pct.get(seg, treatment_effect_arpu_pct["Default"])
    # for B group: re-simulate churn outcome using reduced churn prob
    b_idx = seg_rows[seg_rows["AB"] == "B"].index
    a_idx = seg_rows[seg_rows["AB"] == "A"].index
    if len(b_idx) > 0:
        new_churn_prob = max(0.0, baseline - red)
        df.loc[b_idx, "Churned_post"] = np.random.binomial(1, new_churn_prob, size=len(b_idx))
        # ARPU uplift
        df.loc[b_idx, "MonthlySpend_post"] = df.loc[b_idx, "MonthlySpend"] * (1 + arpu_pct) \
                                             + np.random.normal(0, 1, size=len(b_idx))
    # For A group we keep earlier churn but add small noise to monthly spend
    if len(a_idx) > 0:
        df.loc[a_idx, "MonthlySpend_post"] = df.loc[a_idx, "MonthlySpend"] + np.random.normal(0, 1, size=len(a_idx))
        # optionally, we can resample A churn with same baseline (to simulate trial period)
        df.loc[a_idx, "Churned_post"] = np.random.binomial(1, baseline, size=len(a_idx))

# Clip monthly spend to positive
df["MonthlySpend_post"] = df["MonthlySpend_post"].clip(lower=0.01)

# -------------------------
# 4) Aggregate test metrics (overall & by segment)
# -------------------------
def summarize(df, by=None):
    if by is None:
        groups = [None]
    else:
        groups = df[by].unique()
    rows = []
    if by is None:
        # overall
        for ab in ["A", "B"]:
            sub = df[df["AB"] == ab]
            customers = len(sub)
            churn_rate = sub["Churned_post"].mean()
            arpu = sub["MonthlySpend_post"].mean()
            rows.append({"Group": ab, "Customers": customers, "ChurnRate": churn_rate, "ARPU": arpu})
        return pd.DataFrame(rows)
    else:
        for g in groups:
            for ab in ["A", "B"]:
                sub = df[(df["AB"] == ab) & (df[by] == g)]
                customers = len(sub)
                churn_rate = sub["Churned_post"].mean() if customers > 0 else np.nan
                arpu = sub["MonthlySpend_post"].mean() if customers > 0 else np.nan
                rows.append({by: g, "Group": ab, "Customers": customers, "ChurnRate": churn_rate, "ARPU": arpu})
        return pd.DataFrame(rows)

overall_summary = summarize(df, by=None)
by_segment_summary = summarize(df, by=segment_col)

# -------------------------
# 5) Frequentist tests
#    - Retention: two-sample proportions z-test (A vs B)
#    - ARPU: two-sample t-test (Welch)
# -------------------------
# Retention counts
successes = df.groupby("AB")["Churned_post"].apply(lambda x: (1 - x).sum())  # stayed = 1 - churn
nobs = df.groupby("AB")["Churned_post"].count()
# proportions_ztest expects counts of successes
try:
    stat, pval_prop = proportions_ztest(count=successes.values, nobs=nobs.values, alternative='smaller')
    # I set alternative='smaller' because we expect A's success < B's success? We'll just report two-sided too
except Exception:
    stat, pval_prop = proportions_ztest(count=successes.values, nobs=nobs.values)
# t-test for ARPU (post)
arpu_a = df[df["AB"] == "A"]["MonthlySpend_post"]
arpu_b = df[df["AB"] == "B"]["MonthlySpend_post"]
t_stat, pval_arpu = stats.ttest_ind(arpu_a, arpu_b, equal_var=False, nan_policy='omit')

# Chi-square contingency for churn counts as alternative
contingency = pd.crosstab(df["AB"], df["Churned_post"])
chi2_stat, chi2_p, _, _ = stats.chi2_contingency(contingency)

# -------------------------
# 6) Bayesian analyses
#    - Retention: Beta posterior (Beta(1,1) prior)
#    - ARPU: Bayesian bootstrap for difference in means
# -------------------------
# Retention beta posterior
# successes = stayed counts
success_A = int((1 - df[df["AB"] == "A"]["Churned_post"]).sum())
n_A = int(df[df["AB"] == "A"]["Churned_post"].count())
success_B = int((1 - df[df["AB"] == "B"]["Churned_post"]).sum())
n_B = int(df[df["AB"] == "B"]["Churned_post"].count())

alpha0, beta0 = 1, 1
alpha_A, beta_A = alpha0 + success_A, beta0 + (n_A - success_A)
alpha_B, beta_B = alpha0 + success_B, beta0 + (n_B - success_B)

# sample posteriors
n_samps = 50000
post_A = np.random.beta(alpha_A, beta_A, size=n_samps)
post_B = np.random.beta(alpha_B, beta_B, size=n_samps)
prob_B_better_retention = (post_B > post_A).mean()
retention_diff_samples = post_B - post_A

# ARPU Bayesian bootstrap
def bayesian_bootstrap_diff(x, y, niter=20000):
    # Bayesian bootstrap: sample weights from Dirichlet
    x = np.array(x)
    y = np.array(y)
    nx, ny = len(x), len(y)
    diffs = np.empty(niter)
    for i in range(niter):
        wx = np.random.dirichlet(np.ones(nx))
        wy = np.random.dirichlet(np.ones(ny))
        diffs[i] = (wx @ x) - (wy @ y)
    return diffs

bb_diffs = bayesian_bootstrap_diff(arpu_a.values, arpu_b.values, niter=20000)
prob_B_better_arpu = (bb_diffs > 0).mean()

# -------------------------
# 7) Plots
# -------------------------
sns.set_style("whitegrid")
plt.rcParams.update({"figure.dpi": 150})

# Retention rates A vs B
plt.figure(figsize=(6,4))
rates = [(1 - df[df["AB"] == "A"]["Churned_post"]).mean(), (1 - df[df["AB"] == "B"]["Churned_post"]).mean()]
sns.barplot(x=["A (control)", "B (treatment)"], y=rates)
plt.title("Retention Rate: A vs B (post-treatment)")
plt.ylabel("Retention rate")
plt.tight_layout()
retention_chart = os.path.join(CHARTS_DIR, "retention_rate_ab.png")
plt.savefig(retention_chart)
plt.close()

# ARPU boxplots
plt.figure(figsize=(7,4))
sns.boxplot(x="AB", y="MonthlySpend_post", data=df)
plt.title("ARPU (MonthlySpend) by Group (post)")
plt.tight_layout()
arpu_chart = os.path.join(CHARTS_DIR, "arpu_boxplot.png")
plt.savefig(arpu_chart)
plt.close()

# Posterior distributions for retention
plt.figure(figsize=(7,4))
sns.kdeplot(post_A, label="Posterior Retention A", fill=True)
sns.kdeplot(post_B, label="Posterior Retention B", fill=True)
plt.legend()
plt.title("Posterior Distributions: Retention (Beta posteriors)")
plt.tight_layout()
retention_post_chart = os.path.join(CHARTS_DIR, "retention_posteriors.png")
plt.savefig(retention_post_chart)
plt.close()

# Posterior diffs ARPU (bayesian bootstrap)
plt.figure(figsize=(7,4))
sns.kdeplot(bb_diffs, fill=True)
plt.axvline(0, linestyle="--", color="k")
plt.title("Posterior (Bayesian Bootstrap) Distribution of ARPU difference (B - A)")
plt.tight_layout()
arpu_post_chart = os.path.join(CHARTS_DIR, "arpu_posterior_bootstrap.png")
plt.savefig(arpu_post_chart)
plt.close()

# -------------------------
# 8) Save experimental dataset + summaries
# -------------------------
df.to_csv("flo_ab_experiment_simulated.csv", index=False)
overall_summary.to_csv("flo_ab_overall_summary.csv", index=False)
by_segment_summary.to_csv("flo_ab_by_segment_summary.csv", index=False)

# -------------------------
# 9) Build PDF report
# -------------------------
styles = getSampleStyleSheet()
doc = SimpleDocTemplate(OUT_PDF, pagesize=A4, rightMargin=36, leftMargin=36, topMargin=36, bottomMargin=36)
story = []

# Title & exec summary
story.append(Paragraph("FloSports — A/B Experiment Report", styles["Title"]))
story.append(Spacer(1, 8))

exec_summary = f"""
We simulated an A/B test by randomizing customers within segments into groups A (control) and B (treatment).
Treatment B was simulated to reduce churn (increase retention) and raise ARPU slightly in targeted segments.
Overall sample sizes: A = {n_A}, B = {n_B}.
"""
story.append(Paragraph(exec_summary, styles["Normal"]))
story.append(Spacer(1, 8))

# Frequentist results
story.append(Paragraph("Frequentist Test Results", styles["Heading2"]))
freq_text = f"""
Retention (proportions z-test): stat = {stat:.3f}, p-value = {pval_prop:.4f}
Chi-square for churn contingency: chi2 = {chi2_stat:.3f}, p = {chi2_p:.4f}

ARPU (Welch t-test): t = {t_stat:.3f}, p-value = {pval_arpu:.4f}
"""
story.append(Paragraph(freq_text, styles["Normal"]))
story.append(Spacer(1, 8))

# Bayesian results
story.append(Paragraph("Bayesian Analysis (probabilities)", styles["Heading2"]))
bayes_text = f"""
Probability(B has higher retention than A) ≈ {prob_B_better_retention:.3f}
Probability(B has higher ARPU than A) ≈ {prob_B_better_arpu:.3f}
"""
story.append(Paragraph(bayes_text, styles["Normal"]))
story.append(Spacer(1, 8))

# Add charts
story.append(Paragraph("Retention Rate (A vs B)", styles["Heading2"]))
story.append(Image(retention_chart, width=400, height=250))
story.append(Spacer(1, 8))

story.append(Paragraph("ARPU (A vs B) — distribution", styles["Heading2"]))
story.append(Image(arpu_chart, width=400, height=250))
story.append(Spacer(1, 8))

story.append(Paragraph("Retention Posterior Distributions", styles["Heading2"]))
story.append(Image(retention_post_chart, width=400, height=250))
story.append(Spacer(1, 8))

story.append(Paragraph("ARPU Posterior (Bayesian Bootstrap) Distribution", styles["Heading2"]))
story.append(Image(arpu_post_chart, width=400, height=250))
story.append(Spacer(1, 8))

# Summary table overall
story.append(Paragraph("Overall Summary (A vs B)", styles["Heading2"]))
table_data = [["Group", "Customers", "RetentionRate", "ARPU"]]
for _, row in overall_summary.iterrows():
    table_data.append([row["Group"], int(row["Customers"]), f"{row['ChurnRate']:.3f}", f"{row['ARPU']:.2f}"])

table = Table(table_data, hAlign="LEFT")
table.setStyle(TableStyle([
    ("BACKGROUND", (0,0), (-1,0), colors.grey),
    ("TEXTCOLOR",(0,0),(-1,0),colors.whitesmoke),
    ("ALIGN",(1,1),(-1,-1),"CENTER"),
    ("GRID", (0,0), (-1,-1), 0.5, colors.black)
]))
story.append(table)
story.append(Spacer(1,8))

doc.build(story)

print("✅ A/B experiment simulation & report complete.")
print("Output PDF:", OUT_PDF)
print("Charts folder:", CHARTS_DIR)


  df.loc[ids[:cut], "AB"] = "B"


✅ A/B experiment simulation & report complete.
Output PDF: FloSports_AB_Test_Report.pdf
Charts folder: charts


# Notes
The script simulates treatment effects. If you want to evaluate a real experiment, replace the simulated Churned_post and MonthlySpend_post with observed data from your trial.

I used a Beta(1,1) prior (uniform) for retention posteriors; you can change to informative priors if you have historical conversion info.

Bayesian ARPU used a Bayesian bootstrap — simple, nonparametric, and robust. If you prefer a parametric Bayesian model (Normal + conjugate priors), I can add a PyMC3 / PyMC or NumPyro version (but that requires additional deps).

I balanced A/B assignment within each segment to preserve segment composition. You can change the A/B fraction if you want an uneven split.

## Creating a new A/B testing PDF that covers both retention rate and average revenue per user

We’ll simulate the test, produce all charts and statistical results, and export directly to PDF.
This way, we have Phase 3 documented just like Phase 2.

I am using the existing flo_sports_customers.csv to simulate an A/B test that measures:

1.Retention Rate – percentage of customers who stay subscribed after the test period.

2.Average Revenue per User (ARPU) – average monthly revenue per customer.

# Steps for Phase 3 PDF Report

a.Load flo_sports_customers.csv.

b.Randomly assign customers to Group A (control) and Group B (treatment).

c.Simulate retention and revenue changes for each group.

d.Perform statistical tests (t-test for ARPU, chi-squared for retention).

e.Create clear visualizations.

f.Export to AB_Test_Report.pdf in the same clean style as your segmentation report.

Now i will simulate an A/B test where Group B gets a price discount and we’ll see how it affects retention and ARPU.
Here’s how it will work:

# Group A (Control): No discount, same price.

# Group B (Treatment): Gets a 20% discount, which may increase retention but reduce ARPU.

We’ll randomly assign customers to groups, simulate outcomes, run statistical tests, create visuals, and export everything into a clean AB_Test_Report.pdf.

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from fpdf import FPDF

# Load customer data
df = pd.read_csv('flo_sports_customers.csv')

# Randomly assign to groups
np.random.seed(42)
df['group'] = np.random.choice(['A', 'B'], size=len(df))

# Simulate retention and ARPU
# Assumptions:
# Group A: baseline retention rate 0.5, ARPU $100
# Group B: retention increased by 10% (to 0.55), ARPU decreased by 20% (due to discount)

def simulate_metrics(row):
    if row['group'] == 'A':
        retention = np.random.binomial(1, 0.5)
        arpu = 100 if retention else 0
    else:
        retention = np.random.binomial(1, 0.55)
        arpu = 80 if retention else 0
    return pd.Series({'retention': retention, 'arpu': arpu})

metrics = df.apply(simulate_metrics, axis=1)
df = pd.concat([df, metrics], axis=1)

# Summary stats
summary = df.groupby('group').agg(
    retention_rate=('retention', 'mean'),
    avg_arpu=('arpu', 'mean')
).reset_index()

# Statistical tests
retention_A = df[df['group'] == 'A']['retention']
retention_B = df[df['group'] == 'B']['retention']
arpu_A = df[df['group'] == 'A']['arpu']
arpu_B = df[df['group'] == 'B']['arpu']

# Retention test: Chi-square test
contingency = pd.crosstab(df['group'], df['retention'])
chi2, p_retention, _, _ = stats.chi2_contingency(contingency)

# ARPU test: t-test (only customers who retained)
arpu_A_retained = arpu_A[arpu_A > 0]
arpu_B_retained = arpu_B[arpu_B > 0]
t_stat, p_arpu = stats.ttest_ind(arpu_A_retained, arpu_B_retained, equal_var=False)

# Plotting
sns.set(style='whitegrid')

plt.figure(figsize=(8,4))
sns.barplot(x='group', y='retention_rate', data=summary)
plt.title('Retention Rate by Group')
plt.ylim(0,1)
plt.savefig('retention_rate.png')
plt.close()

plt.figure(figsize=(8,4))
sns.barplot(x='group', y='avg_arpu', data=summary)
plt.title('Average Revenue Per User (ARPU) by Group')
plt.savefig('arpu.png')
plt.close()

# PDF report generation
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 14)
        self.cell(0, 10, 'A/B Test Report: Price Discount Impact', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1)
        self.ln(2)

    def chapter_body(self, body):
        self.set_font('Arial', '', 11)
        self.multi_cell(0, 8, body)
        self.ln()

    def add_image(self, image_path, w=170):
        self.image(image_path, x=20, w=w)
        self.ln(10)

pdf = PDF()
pdf.add_page()

pdf.chapter_title('Summary of Results')
summary_text = (
    f"Retention Rate:\n"
    f" - Group A (Control): {summary.loc[summary['group']=='A', 'retention_rate'].values[0]:.2%}\n"
    f" - Group B (Treatment): {summary.loc[summary['group']=='B', 'retention_rate'].values[0]:.2%}\n\n"
    f"Average Revenue Per User (ARPU):\n"
    f" - Group A (Control): ${summary.loc[summary['group']=='A', 'avg_arpu'].values[0]:.2f}\n"
    f" - Group B (Treatment): ${summary.loc[summary['group']=='B', 'avg_arpu'].values[0]:.2f}\n"
)
pdf.chapter_body(summary_text)

pdf.chapter_title('Statistical Test Results')
stats_text = (
    f"Retention rate difference:\n"
    f" - Chi-square test p-value: {p_retention:.4f}\n"
    f"   (p < 0.05 indicates significant difference)\n\n"
    f"ARPU difference (among retained users):\n"
    f" - T-test p-value: {p_arpu:.4f}\n"
    f"   (p < 0.05 indicates significant difference)\n"
)
pdf.chapter_body(stats_text)

pdf.chapter_title('Business Interpretation')
interpretation = (
    "The treatment group receiving a 20% discount showed a slightly higher retention rate.\n"
    "This suggests the discount may encourage more customers to stay.\n"
    "However, the average revenue per user decreased due to the discount.\n"
    "The statistical tests indicate that:\n"
    "- The increase in retention is statistically "
    + ("significant." if p_retention < 0.05 else "not statistically significant.") + "\n"
    "- The decrease in ARPU is statistically "
    + ("significant." if p_arpu < 0.05 else "not statistically significant.") + "\n\n"
    "Decision makers should weigh the trade-off between higher retention and lower revenue per user.\n"
    "If retention improvement is important for long-term growth, the discount might be beneficial.\n"
    "Otherwise, maintaining current pricing might be preferable."
)
pdf.chapter_body(interpretation)

pdf.chapter_title('Visualizations')
pdf.add_image('retention_rate.png')
pdf.add_image('arpu.png')

pdf.output('AB_Test_Report.pdf')

print("AB_Test_Report.pdf generated successfully.")


  res = hypotest_fun_out(*samples, **kwds)


AB_Test_Report.pdf generated successfully.
