In [4]:
import pandas as pd
ad_clicks = pd.read_csv('ad_clicks.csv')

## Examine Data

In [6]:
print(ad_clicks.info())
print(ad_clicks.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1654 entries, 0 to 1653
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   user_id             1654 non-null   object
 1   utm_source          1654 non-null   object
 2   day                 1654 non-null   object
 3   ad_click_timestamp  565 non-null    object
 4   experimental_group  1654 non-null   object
dtypes: object(5)
memory usage: 64.7+ KB
None
                                user_id utm_source           day  \
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google  6 - Saturday   
1  009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook    7 - Sunday   
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter   2 - Tuesday   
3  011adc64-0f44-4fd9-a0bb-f1506d2ad439     google   2 - Tuesday   
4  012137e6-7ae7-4649-af68-205b4702169c   facebook    7 - Sunday   

  ad_click_timestamp experimental_group  
0               7:18                  A  
1                Na

## Which Platform is getting more views?

In [7]:
utm_source_data = ad_clicks.groupby('utm_source').user_id.count().reset_index()
print(utm_source_data)

  utm_source  user_id
0      email      255
1   facebook      504
2     google      680
3    twitter      215


## Determine Clicked Ad
Modification of `ad_clicks`. If `ad_clicked_timestamp` is not null, user clicked on the ad. 

In [10]:
ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()
print(ad_clicks[['user_id','utm_source','is_click']].head())

                                user_id utm_source  is_click
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google      True
1  009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook     False
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter     False
3  011adc64-0f44-4fd9-a0bb-f1506d2ad439     google     False
4  012137e6-7ae7-4649-af68-205b4702169c   facebook     False


## Determine Clicks by Source
Determine percent of people who clicked on ads from each `utm_source`

In [14]:
clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()
clicks_pivot = clicks_by_source.pivot(columns='is_click', index='utm_source', values='user_id')
clicks_pivot['percent_clicked'] = clicks_pivot[True]/(clicks_pivot[True] + clicks_pivot[False])
print(clicks_pivot)

is_click    False  True  percent_clicked
utm_source                              
email         175    80         0.313725
facebook      324   180         0.357143
google        441   239         0.351471
twitter       149    66         0.306977


## A/B Test
Wew approximately the same number of people shown both ads?

In [15]:
n_experimental_group = ad_clicks.groupby('experimental_group').user_id.count().reset_index()
print(n_experimental_group)

  experimental_group  user_id
0                  A      827
1                  B      827


## Check A/B Percentage

In [19]:
n_exp_click = ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index()
n_exp_click_pivot = n_exp_click.pivot(columns='is_click', index='experimental_group', values='user_id')
n_exp_click_pivot['percentage_clicked'] = n_exp_click_pivot[True]/(n_exp_click_pivot[True] + n_exp_click_pivot[False])
print(n_exp_click_pivot)

is_click            False  True  percentage_clicked
experimental_group                                 
A                     517   310            0.374849
B                     572   255            0.308343


## Separate Ads

In [21]:
a_clicks = ad_clicks[ad_clicks['experimental_group'] == 'A']
b_clicks = ad_clicks[ad_clicks.experimental_group == 'B']
print(a_clicks[['user_id','experimental_group','utm_source']].head())
print(b_clicks[['user_id','experimental_group','utm_source']].head())

                                user_id experimental_group utm_source
0  008b7c6c-7272-471e-b90e-930d548bd8d7                  A     google
2  00f5d532-ed58-4570-b6d2-768df5f41aed                  A    twitter
5  013b0072-7b72-40e7-b698-98b4d0c9967f                  A   facebook
6  0153d85b-7660-4c39-92eb-1e1acd023280                  A     google
7  01555297-d6e6-49ae-aeba-1b196fdbb09f                  A     google
                                 user_id experimental_group utm_source
1   009abb94-5e14-4b6c-bb1c-4f4df7aa7557                  B   facebook
3   011adc64-0f44-4fd9-a0bb-f1506d2ad439                  B     google
4   012137e6-7ae7-4649-af68-205b4702169c                  B   facebook
9   01a210c3-fde0-4e6f-8efd-4f0e38730ae6                  B      email
10  01adb2e7-f711-4ae4-a7c6-29f48457eea1                  B     google


## Percent of users who clicked on Ads by day

In [23]:
a_clicks_day = a_clicks.groupby(['is_click','day']).user_id.count().reset_index()
a_clicks_day_pivot = a_clicks_day.pivot(columns='is_click', index='day', values='user_id')
a_clicks_day_pivot['percentage_clicked'] = a_clicks_day_pivot[True]/(a_clicks_day_pivot[True] + a_clicks_day_pivot[False])
print(a_clicks_day_pivot)

is_click       False  True  percentage_clicked
day                                           
1 - Monday        70    43            0.380531
2 - Tuesday       76    43            0.361345
3 - Wednesday     86    38            0.306452
4 - Thursday      69    47            0.405172
5 - Friday        77    51            0.398438
6 - Saturday      73    45            0.381356
7 - Sunday        66    43            0.394495


In [26]:
b_clicks_day = b_clicks.groupby(['is_click','day']).user_id.count().reset_index()
b_clicks_day_pivot = b_clicks_day.pivot(columns='is_click', index='day', values='user_id')
b_clicks_day_pivot['percentage_clicked'] = b_clicks_day_pivot[True]/(b_clicks_day_pivot[True] + b_clicks_day_pivot[False])
print(b_clicks_day_pivot)

is_click       False  True  percentage_clicked
day                                           
1 - Monday        81    32            0.283186
2 - Tuesday       74    45            0.378151
3 - Wednesday     89    35            0.282258
4 - Thursday      87    29            0.250000
5 - Friday        90    38            0.296875
6 - Saturday      76    42            0.355932
7 - Sunday        75    34            0.311927


## Recommended Ad?

In [32]:
print('Clicked on A: ' + str(format(100*a_clicks_day_pivot.percentage_clicked.mean(), '.2f')) + '%')
print('Clicked on B: ' + str(format(100*b_clicks_day_pivot.percentage_clicked.mean(), '.2f')) + '%')

Clicked on A: 37.54%
Clicked on B: 30.83%
