# THURSDAY 16TH OCTOBER, 2025

## AB TESTING

In [48]:
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np

In [49]:
home_page = pd.read_csv(r"homepage_actions.csv")
home_page.head()

Unnamed: 0,timestamp,id,group,action
0,2016-09-24 17:42:27.839496,804196,experiment,view
1,2016-09-24 19:19:03.542569,434745,experiment,view
2,2016-09-24 19:36:00.944135,507599,experiment,view
3,2016-09-24 19:59:02.646620,671993,control,view
4,2016-09-24 20:26:14.466886,536734,experiment,view


#### EXPERIMENTAL GROUP
- FILTERING EXPERIMENTS IN COLUMN GROUP

In [50]:
experimental_group = home_page[home_page["group"]=="experiment"]
experimental_group.head()

Unnamed: 0,timestamp,id,group,action
0,2016-09-24 17:42:27.839496,804196,experiment,view
1,2016-09-24 19:19:03.542569,434745,experiment,view
2,2016-09-24 19:36:00.944135,507599,experiment,view
4,2016-09-24 20:26:14.466886,536734,experiment,view
5,2016-09-24 20:32:25.712659,681598,experiment,view


In [51]:
click_action_experimental = experimental_group["action"].value_counts()
click_action_experimental

action
view     2996
click     928
Name: count, dtype: int64

##### CONTROL GROUP
- Filtering controls in column group

In [52]:
control_group = home_page[home_page["group"]=="control"]
control_group

Unnamed: 0,timestamp,id,group,action
3,2016-09-24 19:59:02.646620,671993,control,view
9,2016-09-24 21:00:12.278374,560027,control,view
23,2016-09-25 00:25:14.141290,281985,control,view
24,2016-09-25 01:14:48.603202,407864,control,view
25,2016-09-25 02:16:11.046654,342984,control,view
...,...,...,...,...
8178,2017-01-18 08:17:12.675797,616692,control,view
8181,2017-01-18 09:07:37.661143,795585,control,view
8182,2017-01-18 09:09:17.363917,795585,control,click
8186,2017-01-18 10:08:51.588469,505451,control,view


In [53]:
click_action_control = control_group["action"].value_counts()
click_action_control

action
view     3332
click     932
Name: count, dtype: int64

#### SUMS OF EXPERIMENTAL AND CONTROL VIEWS 
- counting the  sum of experimental views and control views

In [54]:
experimental_views = (experimental_group["action"]=="view").sum()
experimental_views

np.int64(2996)

In [55]:
control_views = (control_group["action"]=="view").sum()
control_views

np.int64(3332)

#### COUNTING  SUM OF CONTROL CLICKS AND EXPERIMENTAL CLICKS

In [56]:
control_clicks = (control_group["action"]=="click").sum()
control_clicks

np.int64(932)

In [57]:
experimental_clicks = (experimental_group["action"]=="click").sum()
experimental_clicks

np.int64(928)

#### GETTING CONVERSION TRANSFER RATE
- Dividing clicks by views

In [58]:
experimental_ctr = (experimental_clicks/experimental_views)
experimental_ctr

np.float64(0.3097463284379172)

In [59]:
control_ctr = (control_clicks/control_views)
control_ctr

np.float64(0.2797118847539016)

#### PROBABILITY 
- sum of clicks divided by sum of views

In [70]:
probability = (experimental_clicks+control_clicks)/(experimental_views+control_views)
probability

np.float64(0.2939317319848293)

In [71]:
experimental_group.head()

Unnamed: 0,timestamp,id,group,action
0,2016-09-24 17:42:27.839496,804196,experiment,view
1,2016-09-24 19:19:03.542569,434745,experiment,view
2,2016-09-24 19:36:00.944135,507599,experiment,view
4,2016-09-24 20:26:14.466886,536734,experiment,view
5,2016-09-24 20:32:25.712659,681598,experiment,view


In [61]:
e_sum = experimental_group["action"].count()
e_sum

np.int64(3924)

#### GETTING THE MEANS OF CONTROL AND EXPERIMENTAL
- views multiplied by ctr

In [62]:
control_mean = (control_views*control_ctr)
control_mean

np.float64(932.0)

In [63]:
experimental_mean = (experimental_views*experimental_ctr)
experimental_mean

np.float64(928.0)

#### PULLED STD
- sqrt((p*(1-p)) * ((1/control_views)+(1/experimental_views)))

In [80]:
pulled_std = np.sqrt((probability*(1-probability))*((1/control_views)+(1/experimental_views)))
pulled_std

np.float64(0.011469815134950316)

#### Z STATISTIC
- (experimental_ctr - control_ctr)/(pulled_std)

In [81]:
z_statistic = (experimental_ctr - control_ctr)/(pulled_std)
z_statistic

np.float64(2.618563885349469)

In [78]:
cdf = stats.norm.cdf(z_statistic)
cdf

np.float64(0.9955849622117021)

#### P VALUE

In [79]:
p_value = 1- cdf
p_value

np.float64(0.004415037788297882)

##### we reject the null hypotesis and conclude that there is a difference
- NULL hypothesis there is no difference control and experimental
- alternative there is a difference