In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind, ttest_rel, sem, t

In [None]:
# Define function to calculate confidence interval
def confidence_interval(data, confidence=0.95):
    mean = np.mean(data)
    margin = sem(data) * t.ppf((1 + confidence) / 2., len(data) - 1)
    return mean - margin, mean + margin

In [None]:
# List to collect all results
results_summary = []

In [None]:
# Run t-test for selected metrics between variations and compare geographies
def compare_metric(df, metric_name, variation_col="variation", geo_col="geo"):
    print(f"\n Analyzing metric: **{metric_name}**\n" + "-"*50)

    A = df[df[variation_col] == "A"][metric_name]
    B = df[df[variation_col] == "B"][metric_name]

    # Welch's t-test
    t_stat, p_val = ttest_ind(A, B, equal_var=False)
    ci_A = confidence_interval(A)
    ci_B = confidence_interval(B)

    print(f"Welch’s t-test: t = {t_stat:.4f} | p = {p_val:.4f}")
    print(f"A 95% CI: {ci_A}")
    print(f"B 95% CI: {ci_B}")

    if p_val < 0.05:
        print("Statistically significant difference between A and B")
    else:
        print("No statistically significant difference between A and B")

    # Append to result list
    results_summary.append({
        "Metric": metric_name,
        "t-statistic": round(t_stat, 4),
        "p-value": round(p_val, 4),
        "A mean": round(A.mean(), 4),
        "B mean": round(B.mean(), 4),
        "A 95% CI low": round(ci_A[0], 4),
        "A 95% CI high": round(ci_A[1], 4),
        "B 95% CI low": round(ci_B[0], 4),
        "B 95% CI high": round(ci_B[1], 4)})
    
    # Geography analysis

    pivot = df.pivot(index=geo_col, columns=variation_col, values=metric_name).dropna()
    
    all = df[geo_col].unique().tolist()
    paired_geos = pivot.index.tolist()
    skipped = sorted(set(all) - set(paired_geos))

    if not pivot.empty:
        geo_t, geo_p = ttest_rel(pivot["A"], pivot["B"])
        print(f"\n Geography-level paired t-test: p = {geo_p:.4f}")
        if geo_p < 0.05:
            print("Statistically significant difference across geographies")
        else:
            print("No significant difference across geographies")
    else:
        print("\n No geographies had both A and B for this metric.")

    if skipped:
        print(f"\n Skipped geographies (missing A or B): {skipped}")

In [None]:
# Load data and run analysis
df = pd.read_csv("BigQuery_output.csv")

metrics_to_test = ["Click-through rate", "Conversion rate proxy", "Profit per click", "Profit per money spend"]
for metric in metrics_to_test:
    compare_metric(df, metric)

In [None]:
# Save results to CSV
results_df = pd.DataFrame(results_summary)
results_df.to_csv("test_summary_output.csv", index=False)
print("\n Test results saved to 'test_summary_output.csv'")