In [1]:
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

In [2]:
df= pd.read_csv('ab_test.csv')

In [3]:
df.head()

Unnamed: 0,id,time,con_treat,page,converted
0,851104,11:48.6,control,old_page,0
1,804228,01:45.2,control,old_page,0
2,661590,55:06.2,treatment,new_page,0
3,853541,28:03.1,treatment,new_page,0
4,864975,52:26.2,control,old_page,1


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   id         294478 non-null  int64 
 1   time       294478 non-null  object
 2   con_treat  294478 non-null  object
 3   page       294478 non-null  object
 4   converted  294478 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.2+ MB


In [5]:
df[['con_treat','page']].value_counts()

con_treat  page    
treatment  new_page    145311
control    old_page    145274
treatment  old_page      1965
control    new_page      1928
Name: count, dtype: int64

In [6]:
mismatches = df[
((df['con_treat'] == 'control') & (df['con_treat']=='new_page'))|
  ((df['con_treat'] == 'treatment') & (df['con_treat']=='old_page'))
    ]


In [7]:
mismatches.isnull().sum()

id           0
time         0
con_treat    0
page         0
converted    0
dtype: int64

In [8]:
control_rate = df[df['con_treat'] == 'control']['converted'].mean()
treatment_rate = df[df['con_treat'] == 'treatment']['converted'].mean()

In [9]:
print ('Control Converted Rate',control_rate)
print ('Treatment Converted Rate',treatment_rate)

Control Converted Rate 0.12039917935897611
Treatment Converted Rate 0.11891957956489856


In [10]:
conversions = [
df[df['con_treat'] == 'control']['converted'].sum(),
df[df['con_treat'] == 'treatment']['converted'].sum()
]

total_users = [
    df[df['con_treat']=='control']['converted'].count(),
    df[df['con_treat']=='treatment']['converted'].count()
]

In [11]:
print ('conversions',conversions)

conversions [17723, 17514]


In [12]:
print ('total_users',total_users)

total_users [147202, 147276]


In [13]:
z_stat, p_value = proportions_ztest(count=conversions, nobs=total_users)

In [14]:
print ('Z Statistics',z_stat)
print ('P values',p_value)

Z Statistics 1.2369217547321678
P values 0.21611613269757501


In [None]:
###  A/B Testing Conclusion 
**conducted an A/B test to evaluate the effectiveness of a new landing page (treatment) compared to the existing one (control) in terms of conversion rate.

###  Key Findings:
- **Control Group Conversion Rate:** 0.120%
- **Treatment Group Conversion Rate:** 0.118%
- **Z-Statistic:** 1.24
- **P-Value:** 0.2161

###  Statistical Inference:
Using a two-proportion z-test, we found that the difference in conversion rates between the control and treatment groups is **not statistically significant** at the 5% significance level (p > 0.05).

###  Business Implication:
Since the new landing page does not significantly improve conversions, it may not be worth implementing the change site-wide. Further testing with a larger sample size or a different design may be recommended.
