# A/B testing for ShoeFly.com

In [None]:
# Before I analyze anything, I need to import pandas, load the data and I want to examine the first rows

import pandas as pd

ad_clicks = pd.read_csv('ad_clicks.csv')
print(ad_clicks.head(10))

#### Analyzing ad sources

##### How many views came from each utm_source?

In [None]:
print(ad_clicks.groupby('utm_source').user_id.count().reset_index())

##### I want to create a new column called is_click, which is True if ad_click_timestamp is not null and False otherwise:

In [None]:
ad_clicks['is_click'] = ~ad_clicks\
.ad_click_timestamp.isnull()

##### I want to know the percent of people who clicked on ads from each utm_source:

In [None]:
# I start by grouping by utm_source and is_click and count the number of user_id‘s in each of those groups. 
# The answer I save to the variable clicks_by_source

clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()

##### Now I want to pivot the data so that the columns are is_click (either True or False), the index is utm_source, and the values are user_id:

In [None]:
# I save my results to the variable clicks_pivot

clicks_pivot = clicks_by_source.pivot(
  columns='is_click',
  index='utm_source',
  values='user_id'
).reset_index()

##### After I want to create a new column in clicks_pivot called percent_clicked which is equal to the percent of users who clicked on the ad from each utm_source:

In [None]:
clicks_pivot['percent_clicked'] = clicks_pivot[True] / (clicks_pivot[False] + clicks_pivot[True])

#### Analyzing an A/B test

##### Were approximately the same number of people shown both ads?

In [None]:
# To answer this question I use the column experimental_group that tells me whether the user was shown 
# ad A or ad B

print(ad_clicks.groupby('experimental_group').user_id.count().reset_index())

##### Now I use the column is_click that I defined earlier, to see if a greater percentage of users clicked on ad A or ad B:

In [None]:
print(ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index().pivot(
  index = 'experimental_group', 
  columns = 'is_click', 
  values = 'user_id').reset_index())

##### After I want to check if the A/B test clicks might have changed by day of the week:

In [None]:
# I start by creating two DataFrames: a_clicks and b_clicks

a_clicks = ad_clicks[ad_clicks.experimental_group == 'A']

b_clicks = ad_clicks[ad_clicks.experimental_group == 'B']

# For each group, I calculate the percent of users who clicked on the ad by day

a_clicks_pivot = a_clicks.groupby(['is_click', 'day']).user_id.count().reset_index().pivot(
  index = 'day',
  columns = 'is_click',
  values = 'user_id'
).reset_index()

a_clicks_pivot['percent_clicked'] = a_clicks_pivot[True] / (a_clicks_pivot[True] + a_clicks_pivot[False])

print(a_clicks_pivot)

b_clicks_pivot = b_clicks.groupby(['is_click', 'day']).user_id.count().reset_index().pivot(
  index = 'day',
  columns = 'is_click',
  values = 'user_id'
).reset_index()

b_clicks_pivot['percent_clicked'] = b_clicks_pivot[True] / (b_clicks_pivot[True] + b_clicks_pivot[False])