### End to End a/b test project

##### import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

##### reading the dataset

In [None]:
df = pd.read_csv("ab_data.csv")
df.head()

##### Check structure

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

##### Handling Duplicates

In [None]:
df['user_id'].duplicated().sum()

In [None]:
df = df.drop_duplicates(subset='user_id')  #removes duplicates

In [None]:
df.shape

##### Check groupâ€“page mismatch

In [None]:
mismatch = df[((df['group']=='control') & (df['landing_page']=='new_page')) | ((df['group']=='treatment') & (df['landing_page']=='old_page'))]
mismatch.shape

In [None]:
df=df.drop(mismatch.index)

In [None]:
mismatch_check = df[((df['group']=='control') & (df['landing_page']=='new_page')) | ((df['group']=='treatment') & (df['landing_page']=='old_page'))]
mismatch_check.shape

##### Extra Validation

In [None]:
pd.crosstab(df['group'], df['landing_page'])

#### Basic EDA

In [None]:
df['group'].value_counts()

In [None]:
df['converted'].mean()

In [None]:
df.groupby('group')['converted'].agg(['count','mean'])

In [None]:
sns.barplot(data=df, x='group', y='converted')
plt.title("Conversion Rate: Control vs Treatment")
plt.show()

#### calculate lift

In [None]:
control_rate = 0.120290
treatment_rate = 0.118727

lift = (treatment_rate - control_rate) / control_rate * 100
lift

#### Hypothesis Testing

##### z-proportion test

In [None]:
from scipy import stats
import numpy as np

control = df[df['group'] == 'control']['converted']
treatment = df[df['group'] == 'treatment']['converted']

n_control = len(control)
n_treatment = len(treatment)

print("Control:", n_control, "Treatment:", n_treatment)  # sanity check

p_control = control.mean()
p_treatment = treatment.mean()

# pooled probability
p_pool = (control.sum() + treatment.sum()) / (n_control + n_treatment)

# standard error
se = np.sqrt(p_pool * (1 - p_pool) * (1/n_control + 1/n_treatment))

# z-score (treatment - control)
z_score = (p_treatment - p_control) / se

# two-tailed p-value
p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))

print("Z-score:", z_score)
print("P-value:", p_value)

#### confidence interval

In [None]:
diff = p_treatment - p_control

ci_low = diff - 1.96 * se
ci_high = diff + 1.96 * se

print("95% Confidence Interval:", ci_low, ci_high)

##### Conclusion : The new landing page resulted in a 1.3% decrease in conversion compared to the existing page. However, the difference was not statistically significant (p = 0.196). The 95% confidence interval (-0.39%, 0.08%) includes zero, indicating that the observed difference may be due to random variation. Based on this analysis, there is insufficient evidence to recommend deploying the new landing page. It is advised to retain the current design or continue testing with additional data.