# A/B Testing With Pandas

In [2]:
import pandas as pd

## Analyzing Ad Sources

In [6]:
ad_clicks = pd.read_csv('ad_clicks.csv')

print(ad_clicks.head())

                                user_id utm_source           day  \
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google  6 - Saturday   
1  009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook    7 - Sunday   
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter   2 - Tuesday   
3  011adc64-0f44-4fd9-a0bb-f1506d2ad439     google   2 - Tuesday   
4  012137e6-7ae7-4649-af68-205b4702169c   facebook    7 - Sunday   

  ad_click_timestamp experimental_group  
0               7:18                  A  
1                NaN                  B  
2                NaN                  A  
3                NaN                  B  
4                NaN                  B  


## Which ad platform is getting the most views ?

In [7]:
# Most Ad. Viewing Platform
views_per_platform = ad_clicks.groupby('utm_source').user_id.count().reset_index()

print(views_per_platform)

  utm_source  user_id
0      email      255
1   facebook      504
2     google      680
3    twitter      215


## Click rates for each source?

In [9]:
# Percentage of People clicked from Each UTM source
ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()


clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()

clicks_pivot = clicks_by_source.pivot(columns = 'is_click', index = 'utm_source', values = 'user_id').reset_index()

clicks_pivot = clicks_pivot.rename(columns = {False: 'not_clicked', True: 'clicked'})

clicks_pivot['percent_clicked'] = (clicks_pivot.clicked  / (clicks_pivot.not_clicked + clicks_pivot.clicked)) * 100

print(clicks_pivot)

is_click utm_source  not_clicked  clicked  percent_clicked
0             email          175       80        31.372549
1          facebook          324      180        35.714286
2            google          441      239        35.147059
3           twitter          149       66        30.697674


## Analyzing an A/B Test
   
   Were approximately the same number of people shown both adds? 

In [10]:
# A/B Analytics 

AB_test_shown = ad_clicks.groupby('experimental_group').user_id.count().reset_index()

print(AB_test_shown)

  experimental_group  user_id
0                  A      827
1                  B      827


## Total A/B test Click

In [11]:
click_percentage = ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index()

click_percentage_pivot = click_percentage.pivot(columns = 'is_click', index = 'experimental_group', values = 'user_id')

click_percentage_pivot = click_percentage_pivot.rename(columns = {False: 'Not Clicked', True: 'Clicked'})

print(click_percentage_pivot)

is_click            Not Clicked  Clicked
experimental_group                      
A                           517      310
B                           572      255


## Clicks Over time across A/B

### For A test:

In [12]:
# change of clicks over time
a_click = ad_clicks[ad_clicks.experimental_group == 'A']

b_click = ad_clicks[ad_clicks.experimental_group == 'B']

a_click_by_day = a_click.groupby(['day','is_click']).user_id.count().reset_index()

a_click_by_day_pivot = a_click_by_day.pivot(columns = 'is_click', index = 'day', values = 'user_id').reset_index()


a_click_by_day_pivot["click_percentage %"] = a_click_by_day_pivot[True] * 100 / (a_click_by_day_pivot[False] + a_click_by_day_pivot[True])

a_percentage_per_day = a_click_by_day_pivot[['day', 'click_percentage %']]

print(a_percentage_per_day)

is_click            day  click_percentage %
0            1 - Monday           38.053097
1           2 - Tuesday           36.134454
2         3 - Wednesday           30.645161
3          4 - Thursday           40.517241
4            5 - Friday           39.843750
5          6 - Saturday           38.135593
6            7 - Sunday           39.449541


### For B Test:

In [13]:
b_click_by_day = b_click.groupby(['day','is_click']).user_id.count().reset_index()

b_click_by_day_pivot = b_click_by_day.pivot(columns = 'is_click', index = 'day', values = 'user_id').reset_index()

b_click_by_day_pivot["click_percentage %"] = b_click_by_day_pivot[True] * 100 / (b_click_by_day_pivot[False] + b_click_by_day_pivot[True])

b_percentage_per_day = b_click_by_day_pivot[['day', 'click_percentage %']]


print(b_percentage_per_day)

is_click            day  click_percentage %
0            1 - Monday           28.318584
1           2 - Tuesday           37.815126
2         3 - Wednesday           28.225806
3          4 - Thursday           25.000000
4            5 - Friday           29.687500
5          6 - Saturday           35.593220
6            7 - Sunday           31.192661
