In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys, os
import warnings
import scipy
import scipy.stats as scs

sys.path.append("../scripts/")
from clean_data import  DataCleaner
from utils import  Utils
from PlottingFunctions import  PlottingFunctions
from ABTestingFunctions import  ABTesting
warnings.filterwarnings("ignore")

In [3]:
cleaner= DataCleaner()
utils = Utils()
plotter = PlottingFunctions()
ab_testing = ABTesting()

In [4]:
df = utils.load_data("../data/AdSmartABdata.csv")
responsive_df = cleaner.drop_unresponsive(df)
responsive_df.shape

(1243, 9)

In [5]:
responsive_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1243 entries, 2 to 8071
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   auction_id   1243 non-null   object
 1   experiment   1243 non-null   object
 2   date         1243 non-null   object
 3   hour         1243 non-null   int64 
 4   device_make  1243 non-null   object
 5   platform_os  1243 non-null   int64 
 6   browser      1243 non-null   object
 7   yes          1243 non-null   int64 
 8   no           1243 non-null   int64 
dtypes: int64(4), object(5)
memory usage: 97.1+ KB


In [7]:
cleaned_df=responsive_df[['auction_id','experiment','yes']]
cleaned_df.rename(columns={'yes':'know_brand'},inplace=True)
cleaned_df.head()

Unnamed: 0,auction_id,experiment,know_brand
2,0016d14a-ae18-4a02-a204-6ba53b52f2ed,exposed,0
16,008aafdf-deef-4482-8fec-d98e3da054da,exposed,1
20,00a1384a-5118-4d1b-925b-6cdada50318d,exposed,0
23,00b6fadb-10bd-49e3-a778-290da82f7a8d,control,1
27,00ebf4a8-060f-4b99-93ac-c62724399483,control,0


Data Summary

In [21]:
summary_df = cleaned_df.pivot_table( index='experiment')
summary_df

Unnamed: 0_level_0,know_brand
experiment,Unnamed: 1_level_1
control,0.450512
exposed,0.468798


In [22]:
summary_df = cleaned_df.pivot_table( index='experiment', aggfunc='sum')
summary_df['total'] = cleaned_df.pivot_table( index='experiment', aggfunc='count')[['auction_id']]
summary_df['rate'] = cleaned_df.pivot_table( index='experiment')
summary_df['not_know_brand'] = summary_df['total'] - summary_df['know_brand']
summary_df = summary_df.reindex(['know_brand', 'not_know_brand', 'total', 'rate'], axis=1)
summary_df

Unnamed: 0_level_0,know_brand,not_know_brand,total,rate
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
control,264,322,586,0.450512
exposed,308,349,657,0.468798


In [25]:
exp_rate = summary_df['rate']['exposed']
cont_rate = summary_df['rate']['control']

exp_aware = summary_df['know_brand']['exposed']
cont_aware = summary_df['not_know_brand']['control']

exp_total = summary_df['total']['exposed']
cont_total = summary_df['total']['control']

In [26]:
diff = exp_rate - cont_rate

In [27]:
print(f'The difference between conversion rates of the test and control groups is {diff}.')

The difference between conversion rates of the test and control groups is 0.018285619295484168.
