# A/B Testing for ShoeFly.com

Our favorite online shoe store, ShoeFly.com is performing an A/B Test. 
They have two different versions of an ad, which they have placed in emails,
as well as in banner ads on Facebook, Twitter, and Google.
They want to know how the two ads are performing on each of the different platforms on each day of the week. 
We are here to help them analyze the data using aggregate measures.

In [1]:
import pandas as pd

In [5]:
ad_clicks = pd.read_csv('ad_click.csv')
ad_clicks.head(10)

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B
5,013b0072-7b72-40e7-b698-98b4d0c9967f,facebook,1 - Monday,,A
6,0153d85b-7660-4c39-92eb-1e1acd023280,google,4 - Thursday,,A
7,01555297-d6e6-49ae-aeba-1b196fdbb09f,google,3 - Wednesday,,A
8,018cea61-19ea-4119-895b-1a4309ccb148,email,1 - Monday,18:33,A
9,01a210c3-fde0-4e6f-8efd-4f0e38730ae6,email,2 - Tuesday,15:21,B


In [8]:
# our manager wants to know which ad platform is getting you the most views.
print(ad_clicks.groupby('utm_source')\
    .user_id.count()\
    .reset_index())

  utm_source  user_id
0      email      255
1   facebook      504
2     google      680
3    twitter      215


In [17]:
# If the column ad_click_timestamp is not null, then someone actually clicked on the ad that was displayed.
ad_clicks['is_click'] = ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()
   
ad_clicks.head(10)

Unnamed: 0,user_id,utm_source,day,ad_click_timestamp,experimental_group,is_click
0,008b7c6c-7272-471e-b90e-930d548bd8d7,google,6 - Saturday,7:18,A,True
1,009abb94-5e14-4b6c-bb1c-4f4df7aa7557,facebook,7 - Sunday,,B,False
2,00f5d532-ed58-4570-b6d2-768df5f41aed,twitter,2 - Tuesday,,A,False
3,011adc64-0f44-4fd9-a0bb-f1506d2ad439,google,2 - Tuesday,,B,False
4,012137e6-7ae7-4649-af68-205b4702169c,facebook,7 - Sunday,,B,False
5,013b0072-7b72-40e7-b698-98b4d0c9967f,facebook,1 - Monday,,A,False
6,0153d85b-7660-4c39-92eb-1e1acd023280,google,4 - Thursday,,A,False
7,01555297-d6e6-49ae-aeba-1b196fdbb09f,google,3 - Wednesday,,A,False
8,018cea61-19ea-4119-895b-1a4309ccb148,email,1 - Monday,18:33,A,True
9,01a210c3-fde0-4e6f-8efd-4f0e38730ae6,email,2 - Tuesday,15:21,B,True


In [32]:
# We want to know the percent of people who clicked on ads from each utm_source.
click_by_source = ad_clicks.groupby(['utm_source','is_click']).user_id.count().reset_index()
# Now let’s pivot the data so that the columns are is_click (either True or False), 
# the index is utm_source, and the values are user_id.
clicks_pivot=click_by_source.pivot(columns='is_click', index='utm_source',values='user_id')
clicks_pivot['percent_clicked'] = \
   clicks_pivot[True] / \
   (clicks_pivot[True] + 
    clicks_pivot[False])
clicks_pivot

is_click,False,True,percent_clicked
utm_source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
email,175,80,0.313725
facebook,324,180,0.357143
google,441,239,0.351471
twitter,149,66,0.306977


In [47]:
# The Product Manager for the A/B test thinks that the clicks might have changed by day of the week.
a_clicks = ad_clicks[ad_clicks.experimental_group == 'A']
b_clicks = ad_clicks[ad_clicks.experimental_group == 'B']
day_click_a = a_clicks.groupby(['day','is_click']).user_id.count().reset_index()
day_click_a_pivot = day_click_a.pivot(columns='is_click',index='day',values='user_id')
day_click_a_pivot['percent_A'] = day_click_a_pivot[True]/(day_click_a_pivot[False]+day_click_a_pivot[True])
day_click_a_pivot

is_click,False,True,percent_A
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,70,43,0.380531
2 - Tuesday,76,43,0.361345
3 - Wednesday,86,38,0.306452
4 - Thursday,69,47,0.405172
5 - Friday,77,51,0.398438
6 - Saturday,73,45,0.381356
7 - Sunday,66,43,0.394495


In [48]:
day_click_b = b_clicks.groupby(['day','is_click']).user_id.count().reset_index()
day_click_b_pivot = day_click_b.pivot(columns='is_click',index='day',values='user_id')
day_click_b_pivot['percent_B'] = day_click_b_pivot[True]/(day_click_b_pivot[False]+day_click_b_pivot[True])
day_click_b_pivot

is_click,False,True,percent_B
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1 - Monday,81,32,0.283186
2 - Tuesday,74,45,0.378151
3 - Wednesday,89,35,0.282258
4 - Thursday,87,29,0.25
5 - Friday,90,38,0.296875
6 - Saturday,76,42,0.355932
7 - Sunday,75,34,0.311927


In [49]:
# If we compare clicks on Test A and Test B,
# Test A is Better Version of ad
# So, I will recomment Version A of the Ad