In [1]:
import pandas as pd
from scipy.stats import chi2_contingency

In [2]:
# Load the data
events = pd.read_csv('../data/events_ab.csv')

In [3]:
# Let's check if each user makes a purchase only once

pivot = events.pivot_table(index='user_id', columns='event_name', values='event_time', aggfunc='count').fillna(0)
pivot = pivot.reset_index()

In [4]:
# User's A/B group

ab_groups = events[['user_id', 'ab_group']].drop_duplicates()
df = pivot.merge(ab_groups, on='user_id')

In [5]:
# Number of purchases per group

summary = df.groupby('ab_group')['purchase'].agg(['sum', 'count']).rename(columns={'sum': 'purchases', 'count': 'total'})
summary['conversion_rate'] = summary['purchases'] / summary['total']
print(summary)

          purchases  total  conversion_rate
ab_group                                   
A              46.0    490         0.093878
B              80.0    510         0.156863


In [6]:
# Chi-Square Test
table = pd.crosstab(df['ab_group'], df['purchase'] > 0)
chi2, p, dof, expected = chi2_contingency(table)

In [7]:
print(f"\nChi-square p-value: {p:.4f}")
if p < 0.05:
    print("A/B test result is statistically significant.")
else:
    print("A/B test result is not significant.")


Chi-square p-value: 0.0037
A/B test result is statistically significant.
