In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, norm

In [None]:
# Load dataset
df = pd.read_csv("../data/processed_experiment_results.csv")

# Overview of Data
print("\n Info:")
print(df.info())

print("\n Summary Statistics:")
print(df.describe())

In [None]:
# Overall summary statistics
summary_stats = df.groupby("strategy").agg(
    clicks_mean=("clicks", "mean"),
    clicks_median=("clicks", "median"),
    interest_mean=("high_interest", "mean"),
    interest_median=("high_interest", "median"),
    session_length_mean=("session_length", "mean"),
    session_length_median=("session_length", "median"),
    count=("session_id", "count")
)

# Show summary statistics
summary_stats

In [None]:
# Clicks Distribution by Strategy
plt.figure(figsize=(9,3))
sns.boxplot(data=df, x="strategy", y="clicks")
plt.title("Clicks Distribution by Strategy")
plt.show()

In [None]:
# Interest Actions by Strategy
plt.figure(figsize=(9,3))
sns.boxplot(data=df, x="strategy", y="high_interest")
plt.title("Interest Actions by Strategy")
plt.show()

In [None]:
# Session Length Distribution
plt.figure(figsize=(9,3))
sns.boxplot(data=df, x="strategy", y="session_length")
plt.title("Session Length by Strategy")
plt.show()

## AB Test (against strategy_0)

In [None]:
def run_z_test(group1, group2, metric):
    # Compute means and standard deviations
    x1, x2 = df[df["strategy"] == group1][metric].mean(), df[df["strategy"] == group2][metric].mean()
    s1, s2 = df[df["strategy"] == group1][metric].std(), df[df["strategy"] == group2][metric].std()
    n1, n2 = df[df["strategy"] == group1][metric].count(), df[df["strategy"] == group2][metric].count()

    # Compute Z-score
    se = np.sqrt((s1**2 / n1) + (s2**2 / n2))
    z_score = (x1 - x2) / se
    p_value = 2 * (1 - norm.cdf(abs(z_score)))  # Two-tailed test

    return z_score, p_value

def run_t_test(group1, group2, metric):
    stat, p_value = ttest_ind(df[df["strategy"] == group1][metric], 
                              df[df["strategy"] == group2][metric], 
                              equal_var=False)
    return stat, p_value

In [None]:
# Run T-tests on key metrics
metrics = ["clicks", "high_interest", "session_length"]
for metric in metrics:
    stat, p = run_t_test("strategy_0", "strategy_A", metric)
    print(f"T-test for {metric}: t-stat={stat}, p-value={p}")

# Run Z-tests
for metric in metrics:
    z, p = run_z_test("strategy_0", "strategy_A", metric)
    print(f"Z-test for {metric}: z-score={z}, p-value={p}")