# A/B Testing for ShoeFly.com
Our favorite online shoe store, ShoeFly.com is performing an A/B Test. They have two different versions of an ad, which they have placed in emails, as well as in banner ads on Facebook, Twitter, and Google. They want to know how the two ads are performing on each of the different platforms on each day of the week. Help them analyze the data using aggregate measures.

### Analyzing Ad Sources

In [1]:
import pandas as pd

ad_clicks = pd.read_csv('ad_clicks.csv')

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B
...,...,...,...,...,...
1649,fe8b5236-78f6-4192-9da6-a76bba67cfe6,twitter,7 - Sunday,,A
1650,fed3db6d-8c92-40e3-a4fb-1fb9d7337eb1,facebook,5 - Friday,,B
1651,ff3a22ff-521c-478c-87ca-7dc7b8f34372,twitter,3 - Wednesday,,B
1652,ff3af0d6-b092-4c4d-9f2e-2bdd8f7c0732,google,1 - Monday,22:57,A


In [2]:
ad_clicks.head(5)

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B


In [4]:
source = ad_clicks.groupby('utm_source').user_id.count().reset_index()
source

Unnamed: 0,utm_source,user_id
0,email,255
1,facebook,504
2,google,680
3,twitter,215


In [5]:
ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()
ad_clicks

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group,is_click
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A,True
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B,False
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A,False
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B,False
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B,False
...,...,...,...,...,...,...
1649,fe8b5236-78f6-4192-9da6-a76bba67cfe6,twitter,7 - Sunday,,A,False
1650,fed3db6d-8c92-40e3-a4fb-1fb9d7337eb1,facebook,5 - Friday,,B,False
1651,ff3a22ff-521c-478c-87ca-7dc7b8f34372,twitter,3 - Wednesday,,B,False
1652,ff3af0d6-b092-4c4d-9f2e-2bdd8f7c0732,google,1 - Monday,22:57,A,True


In [7]:
clicks_by_source = ad_clicks.groupby(['utm_source','is_click']).user_id.count().reset_index()
clicks_by_source

Unnamed: 0,utm_source,is_click,user_id
0,email,False,175
1,email,True,80
2,facebook,False,324
3,facebook,True,180
4,google,False,441
5,google,True,239
6,twitter,False,149
7,twitter,True,66


In [8]:
clicks_pivot = clicks_by_source.pivot(columns='is_click', index='utm_source', values='user_id')
clicks_pivot

is_click,False,True
utm_source,Unnamed: 1_level_1,Unnamed: 2_level_1
email,175,80
facebook,324,180
google,441,239
twitter,149,66


In [12]:
clicks_pivot['percent_clicked'] = clicks_pivot[True]/(clicks_pivot[True]+clicks_pivot[False])
clicks_pivot

is_click,False,True,percent_clicked
utm_source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
email,175,80,0.313725
facebook,324,180,0.357143
google,441,239,0.351471
twitter,149,66,0.306977


### Analyzing an A/B Test

In [13]:
ABcounting = ad_clicks.groupby('experimental_group').user_id.count().reset_index()
ABcounting

Unnamed: 0,experimental_group,user_id
0,A,827
1,B,827


In [17]:
ABclickCount = ad_clicks.groupby(['experimental_group','is_click']).user_id.count().reset_index()
ABclickCountPivot = ABclickCount.pivot(columns='is_click', index='experimental_group', values='user_id')
ABclickCountPivot

is_click,False,True
experimental_group,Unnamed: 1_level_1,Unnamed: 2_level_1
A,517,310
B,572,255


In [20]:
a_database = ad_clicks[ad_clicks.experimental_group == 'A']
a_clicksGen = a_database.groupby(['day','is_click']).user_id.count().reset_index()
a_clicks = a_clicksGen.pivot(columns='is_click', index='day', values='user_id')
a_clicks['percent_clicked'] = a_clicks[True]/(a_clicks[True]+a_clicks[False])
a_clicks

is_click,False,True,percent_clicked
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,70,43,0.380531
2 - Tuesday,76,43,0.361345
3 - Wednesday,86,38,0.306452
4 - Thursday,69,47,0.405172
5 - Friday,77,51,0.398438
6 - Saturday,73,45,0.381356
7 - Sunday,66,43,0.394495


In [21]:
b_database = ad_clicks[ad_clicks.experimental_group == 'B']
b_clicksGen = b_database.groupby(['day','is_click']).user_id.count().reset_index()
b_clicks = b_clicksGen.pivot(columns='is_click', index='day', values='user_id')
b_clicks['percent_clicked'] = b_clicks[True]/(b_clicks[True]+b_clicks[False])
b_clicks

is_click,False,True,percent_clicked
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,81,32,0.283186
2 - Tuesday,74,45,0.378151
3 - Wednesday,89,35,0.282258
4 - Thursday,87,29,0.25
5 - Friday,90,38,0.296875
6 - Saturday,76,42,0.355932
7 - Sunday,75,34,0.311927
