In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/grocery-website-data-for-ab-test/grocerywebsiteabtestdata.csv


## A\B Test For Grocery-Website
1. ### [Investigating](#Investigating)
1. ### [Checking For Normality](#Normality)
1. ### [Statistical Testing for click rates for two independent samples](#Testing)
1. ### [Power Analysis](#Power)
1. ### [Conclusion](#Conclusion)

In [2]:
#Importing Libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import statsmodels.api as sm
from scipy import stats





<a id='Investigating'></a>
# Investigating Data

In [3]:
#Loading data
df=pd.read_csv("/kaggle/input/grocery-website-data-for-ab-test/grocerywebsiteabtestdata.csv")

In [4]:
df.head()

Unnamed: 0,RecordID,IP Address,LoggedInFlag,ServerID,VisitPageFlag
0,1,39.13.114.2,1,2,0
1,2,13.3.25.8,1,1,0
2,3,247.8.211.8,1,1,0
3,4,124.8.220.3,0,3,0
4,5,60.10.192.7,0,2,0


In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RecordID,184588.0,92294.5,53286.110082,1.0,46147.75,92294.5,138441.25,184588.0
LoggedInFlag,184588.0,0.503657,0.499988,0.0,0.0,1.0,1.0,1.0
ServerID,184588.0,1.999691,0.816719,1.0,1.0,2.0,3.0,3.0
VisitPageFlag,184588.0,0.058184,0.234091,0.0,0.0,0.0,0.0,1.0


In [6]:
#Check for duplicates (users that logged)
df['IP Address'].duplicated().sum()

85072

#### For IP Addresses that visited the website more than once will be reduced to only one to not effecting our results.

In [7]:
df=df.groupby(["IP Address","LoggedInFlag","ServerID"])['VisitPageFlag'].sum()

In [8]:
df = df.reset_index(name="VisitPageFlagSum")
df.head()

Unnamed: 0,IP Address,LoggedInFlag,ServerID,VisitPageFlagSum
0,0.0.108.2,0,1,0
1,0.0.109.6,1,1,0
2,0.0.111.8,0,3,0
3,0.0.160.9,1,2,0
4,0.0.163.1,0,2,0


In [9]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
LoggedInFlag,99763.0,0.503694,0.499989,0.0,0.0,1.0,1.0,1.0
ServerID,99763.0,1.999729,0.816932,1.0,1.0,2.0,3.0,3.0
VisitPageFlagSum,99763.0,0.107655,0.334371,0.0,0.0,0.0,0.0,4.0


In [10]:

df['Visit_Page_Flag']=df.VisitPageFlagSum.apply(lambda x: 1 if x!=0 else 0 )

In [11]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
LoggedInFlag,99763.0,0.503694,0.499989,0.0,0.0,1.0,1.0,1.0
ServerID,99763.0,1.999729,0.816932,1.0,1.0,2.0,3.0,3.0
VisitPageFlagSum,99763.0,0.107655,0.334371,0.0,0.0,0.0,0.0,4.0
Visit_Page_Flag,99763.0,0.100017,0.300024,0.0,0.0,0.0,0.0,1.0


In [12]:
df.head()

Unnamed: 0,IP Address,LoggedInFlag,ServerID,VisitPageFlagSum,Visit_Page_Flag
0,0.0.108.2,0,1,0,0
1,0.0.109.6,1,1,0,0
2,0.0.111.8,0,3,0,0
3,0.0.160.9,1,2,0,0
4,0.0.163.1,0,2,0,0


In [13]:
df['group']=df.ServerID.map({1:'treatment',2:'control',3:'control'})
df.drop(['ServerID','VisitPageFlagSum'],axis=1,inplace=True)
df.head()

Unnamed: 0,IP Address,LoggedInFlag,Visit_Page_Flag,group
0,0.0.108.2,0,0,treatment
1,0.0.109.6,1,0,treatment
2,0.0.111.8,0,0,control
3,0.0.160.9,1,0,control
4,0.0.163.1,0,0,control


In [14]:
# Calcualte Important metrics
n_t=df.query('group=="treatment"')['IP Address'].count()#number of people in treatment group
n_c=df.query('group=="control"')['IP Address'].count()#number of people in control group
converted_t=df.query('group=="treatment"&Visit_Page_Flag==1')['IP Address'].count()#number of clicks in treatment group
converted_c=df.query('group=="control"&Visit_Page_Flag==1')['IP Address'].count()#number of clicks in control group
t_r=converted_t/n_t#click ratio for treatment
c_r=converted_c/n_c#click ratio for control
print(f"""number of people in treatment group={n_t}
number of people in control group={n_c}
number of clicks in treatment group={converted_t}
number of clicks in control group={converted_c}
click ratio for treatment={t_r}
click ratio for control={c_r}
""")

number of people in treatment group=33303
number of people in control group=66460
number of clicks in treatment group=3847
number of clicks in control group=6131
click ratio for treatment=0.11551511875806984
click ratio for control=0.09225097803189888



<a id='Normality'></a>
# Checking For Normality 

#### In order to choose the appropriate statistical test, it is necessary to first assess the normality of the data.
* **Parametric tests** are based on the assumption that the data is normally distributed. This means that the data should be symmetrical and bell-shaped. If the data is not normally distributed, then the results of the parametric test may not be accurate. **(two_sample_z_test or two_sample_t_test)**

* **Nonparametric tests** do not make any assumptions about the distribution of the data. This means that they can be used even if the data is not normally distributed. However, nonparametric tests are not as powerful as parametric tests. **(Mann-Whitney U test)**

##### The choice of whether to use a parametric or nonparametric test will depend on the specific situation. If the data is normally distributed, then a parametric test can be used. However, if the data is not normally distributed, then a nonparametric test should be used.

### H_0 : The data is normally distributed.
### H_1 : The data is not normally distributed.

In [15]:
control_group=df.query('group=="control"')['Visit_Page_Flag']
treatment_group=df.query('group=="treatment"')['Visit_Page_Flag']

In [16]:
#checking for normality for treatment group
statistic_treatment,pvalue_treatmen=stats.shapiro(treatment_group)

print(f"""statistic_treatment=={statistic_treatment:.4f}
pvalue_treatmen={pvalue_treatmen:.4f}""")

statistic_treatment==0.3711
pvalue_treatmen=0.0000




In [17]:
#checking for normality for control group
statistic_control,pvalue_control=stats.shapiro(control_group)

print(f"""statistic_control=={statistic_control:.4f}
\npvalue_control={pvalue_control:.4f}""")

statistic_control==0.3266

pvalue_control=0.0000


In [18]:
if pvalue_treatmen or pvalue_control <0.05 :
    print("Data is not normally distributed")
else :
    print("Data is normally distributed")

Data is not normally distributed


#### As data is not normally distributed a -non parametric test- will be used.

<a id='Testing'></a>
# Statistical Testing for click rates for two independent samples.

## Mann-Whitney U test

### H_0 : There is no difference in click rates between two groups.
### H_1 : There is difference in click rates between two groups.

In [19]:
statistic,p_value=stats.mannwhitneyu(treatment_group,control_group)
print(f"""statistic=={statistic}
p_value=={p_value}""")

if p_value<0.05:
    print("\nWe can reject the null hypothesis.")
else :
    print('\nwe can not reject null hypothesis')
    

statistic==1132404153.5
p_value==7.41454867554153e-31

We can reject the null hypothesis.


<a id='Power'></a>
# Power Analysis

* #### check sample size to achieve 95 % power (The power is the probability of correctly rejecting the null hypothesis when it is false. The default power is 0.80, which means that you are 80% confident that you will correctly reject the null hypothesis when it is false.)

In [20]:
#Assuming a medium effect size (0.5 due to cohen's d measure)
control_sample_size=sm.stats.tt_ind_solve_power(effect_size=0.5,nobs1=None,alpha=0.05,power=0.95,ratio=n_t/n_c,alternative="two-sided")
if n_c>control_sample_size :
    print('We have enough sample size to achieve 95 % power with a medium effect size')
else :
    print ('We donot have enough sample size to achieve 95 % power with a medium effect size')

We have enough sample size to achieve 95 % power with a medium effect size


* #### Calculate minimum detectable effect size

In [21]:
min_effect_size=sm.stats.tt_ind_solve_power(effect_size=None,nobs1=n_c,alpha=0.05,power=0.95,ratio=n_t/n_c,alternative="two-sided")
print (f"Minimum detectable Effect Size \t {min_effect_size}")

Minimum detectable Effect Size 	 0.02420017575614079


* #### Calculating Effect Size Using Cohen's d (Cohen’s d measures the size of the difference between two groups )

In [22]:
# Cohen's d effect size
#sm.stats.proportion_effectsize(t_r,c_r)
# Using Pooled Standard error, with a difference of only 0.03. 
#This suggests that the two groups may have similar standard deviations. 
#Therefore, the pooled standard deviation is probably a good choice.
pooled_SD = (n_t * treatment_group.std() + n_c * control_group.std()) / (n_t + n_c)
effect_size=(t_r-c_r)/pooled_SD
if effect_size <= 0.2 :
    print(f'Effect_Size=={effect_size}\t Small Effect Size')
elif 0.2<effect_size <= 0.5 :
    print(f'Effect_Size=={effect_size}\t Medium Effect Size')
else :
    print(f'Effect_Size=={effect_size}\t Large Effect Size')

Effect_Size==0.07768041370001634	 Small Effect Size


* #### Calculating Power 


In [23]:
#Calculating Power using real Effect Size
power=sm.stats.tt_ind_solve_power(effect_size=effect_size,nobs1=n_c,alpha=0.05,power=None,ratio=n_t/n_c,alternative="two-sided")
print(f"Power \t {power}")

Power 	 1.0


<a id='Conclusion'></a>
# Conclusion

* ### We have enough sample size to achieve 95% power with medium effect size.
* ### Effect Size larger than minimum detectable effect size.
* ### We can reject the null hypothesis that there is no significant difference in click rate between the two groups. The results of the Mann-Whitney U test show that there is a significant statistical difference in click rates between the two groups, with the treatment group having a higher click rate than the control group.the control group.
* ### In terms of Cohen's d, an effect size of 0.0777 is considered to be a small effect size. Although this difference is statistically significant, the practical significance is considered to be very small.