In [1]:
import numpy as np
import pandas as pd

In [3]:
ad_clicks = pd.read_csv('ad_clicks.csv')

print(ad_clicks.head())

                                user_id utm_source           day  \
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google  6 - Saturday   
1  009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook    7 - Sunday   
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter   2 - Tuesday   
3  011adc64-0f44-4fd9-a0bb-f1506d2ad439     google   2 - Tuesday   
4  012137e6-7ae7-4649-af68-205b4702169c   facebook    7 - Sunday   

  ad_click_timestamp experimental_group  
0               7:18                  A  
1                NaN                  B  
2                NaN                  A  
3                NaN                  B  
4                NaN                  B  


In [4]:
most_viewed_platform = ad_clicks.groupby('utm_source').user_id.count().reset_index()

print(most_viewed_platform)

  utm_source  user_id
0      email      255
1   facebook      504
2     google      680
3    twitter      215


In [5]:
ad_clicks['is_click'] = ~ad_clicks.ad_click_timestamp.isnull()
print(ad_clicks.head())

                                user_id utm_source           day  \
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google  6 - Saturday   
1  009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook    7 - Sunday   
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter   2 - Tuesday   
3  011adc64-0f44-4fd9-a0bb-f1506d2ad439     google   2 - Tuesday   
4  012137e6-7ae7-4649-af68-205b4702169c   facebook    7 - Sunday   

  ad_click_timestamp experimental_group  is_click  
0               7:18                  A      True  
1                NaN                  B     False  
2                NaN                  A     False  
3                NaN                  B     False  
4                NaN                  B     False  


In [7]:
clicks_by_source = ad_clicks.groupby(['utm_source', 'is_click']).user_id.count().reset_index()
print(clicks_by_source)

  utm_source  is_click  user_id
0      email     False      175
1      email      True       80
2   facebook     False      324
3   facebook      True      180
4     google     False      441
5     google      True      239
6    twitter     False      149
7    twitter      True       66


In [8]:
clicks_pivot = clicks_by_source.pivot(
  columns='is_click',
  index='utm_source',
  values='user_id').reset_index()
print(clicks_pivot)

is_click utm_source  False  True
0             email    175    80
1          facebook    324   180
2            google    441   239
3           twitter    149    66


In [9]:
clicks_pivot['percent_clicked'] = clicks_pivot[True] / (clicks_pivot[True] + clicks_pivot[False])
print(clicks_pivot)

is_click utm_source  False  True  percent_clicked
0             email    175    80         0.313725
1          facebook    324   180         0.357143
2            google    441   239         0.351471
3           twitter    149    66         0.306977


In [10]:
count_for_group = ad_clicks.groupby('experimental_group').user_id.count().reset_index()
print(count_for_group)

  experimental_group  user_id
0                  A      827
1                  B      827


In [11]:
number_clicked = ad_clicks.groupby(['experimental_group', 'is_click']).user_id.count().reset_index()
print(number_clicked)

  experimental_group  is_click  user_id
0                  A     False      517
1                  A      True      310
2                  B     False      572
3                  B      True      255


In [12]:
number_clicked_pivot = number_clicked.pivot(
  columns='is_click',
  index='experimental_group',
  values='user_id').reset_index()
print(number_clicked_pivot)

is_click experimental_group  False  True
0                         A    517   310
1                         B    572   255


In [13]:
number_clicked_pivot['percentage_clicked'] = number_clicked_pivot[True] / (number_clicked_pivot[True] + number_clicked_pivot[False])
print(number_clicked_pivot)

is_click experimental_group  False  True  percentage_clicked
0                         A    517   310            0.374849
1                         B    572   255            0.308343


In [14]:
a_clicks = ad_clicks[ad_clicks.experimental_group == 'A']
b_clicks = ad_clicks[ad_clicks.experimental_group == 'B']
print(a_clicks.head())
print(b_clicks.head())

                                user_id utm_source            day  \
0  008b7c6c-7272-471e-b90e-930d548bd8d7     google   6 - Saturday   
2  00f5d532-ed58-4570-b6d2-768df5f41aed    twitter    2 - Tuesday   
5  013b0072-7b72-40e7-b698-98b4d0c9967f   facebook     1 - Monday   
6  0153d85b-7660-4c39-92eb-1e1acd023280     google   4 - Thursday   
7  01555297-d6e6-49ae-aeba-1b196fdbb09f     google  3 - Wednesday   

  ad_click_timestamp experimental_group  is_click  
0               7:18                  A      True  
2                NaN                  A     False  
5                NaN                  A     False  
6                NaN                  A     False  
7                NaN                  A     False  
                                 user_id utm_source            day  \
1   009abb94-5e14-4b6c-bb1c-4f4df7aa7557   facebook     7 - Sunday   
3   011adc64-0f44-4fd9-a0bb-f1506d2ad439     google    2 - Tuesday   
4   012137e6-7ae7-4649-af68-205b4702169c   facebook     7 - Sun

In [15]:
a_clicks_by_day = a_clicks.groupby(['is_click', 'day']).user_id.count().reset_index()
print(a_clicks_by_day)

b_clicks_by_day = b_clicks.groupby(['is_click', 'day']).user_id.count().reset_index()
print(b_clicks_by_day)

    is_click            day  user_id
0      False     1 - Monday       70
1      False    2 - Tuesday       76
2      False  3 - Wednesday       86
3      False   4 - Thursday       69
4      False     5 - Friday       77
5      False   6 - Saturday       73
6      False     7 - Sunday       66
7       True     1 - Monday       43
8       True    2 - Tuesday       43
9       True  3 - Wednesday       38
10      True   4 - Thursday       47
11      True     5 - Friday       51
12      True   6 - Saturday       45
13      True     7 - Sunday       43
    is_click            day  user_id
0      False     1 - Monday       81
1      False    2 - Tuesday       74
2      False  3 - Wednesday       89
3      False   4 - Thursday       87
4      False     5 - Friday       90
5      False   6 - Saturday       76
6      False     7 - Sunday       75
7       True     1 - Monday       32
8       True    2 - Tuesday       45
9       True  3 - Wednesday       35
10      True   4 - Thursday       29
1

In [16]:
a_clicks_day_pivot = a_clicks_by_day.pivot(
  columns='is_click',
  index='day',
  values='user_id').reset_index()

a_clicks_day_pivot['percentage_clicked'] = a_clicks_day_pivot[True] / (a_clicks_day_pivot[False] + a_clicks_day_pivot[True])

print(a_clicks_day_pivot)

is_click            day  False  True  percentage_clicked
0            1 - Monday     70    43            0.380531
1           2 - Tuesday     76    43            0.361345
2         3 - Wednesday     86    38            0.306452
3          4 - Thursday     69    47            0.405172
4            5 - Friday     77    51            0.398438
5          6 - Saturday     73    45            0.381356
6            7 - Sunday     66    43            0.394495


In [17]:
b_clicks_day_pivot = b_clicks_by_day.pivot(
  columns='is_click',
  index='day',
  values='user_id').reset_index()

b_clicks_day_pivot['percentage_clicked'] = b_clicks_day_pivot[True] / (b_clicks_day_pivot[True] + b_clicks_day_pivot[False])

print(b_clicks_day_pivot)

is_click            day  False  True  percentage_clicked
0            1 - Monday     81    32            0.283186
1           2 - Tuesday     74    45            0.378151
2         3 - Wednesday     89    35            0.282258
3          4 - Thursday     87    29            0.250000
4            5 - Friday     90    38            0.296875
5          6 - Saturday     76    42            0.355932
6            7 - Sunday     75    34            0.311927


In [19]:
a_percentage_by_day = a_clicks_day_pivot[['day', 'percentage_clicked']]
print(a_percentage_by_day)

is_click            day  percentage_clicked
0            1 - Monday            0.380531
1           2 - Tuesday            0.361345
2         3 - Wednesday            0.306452
3          4 - Thursday            0.405172
4            5 - Friday            0.398438
5          6 - Saturday            0.381356
6            7 - Sunday            0.394495


In [20]:
b_percentage_by_day = b_clicks_day_pivot[['day', 'percentage_clicked']]
print(b_percentage_by_day)

is_click            day  percentage_clicked
0            1 - Monday            0.283186
1           2 - Tuesday            0.378151
2         3 - Wednesday            0.282258
3          4 - Thursday            0.250000
4            5 - Friday            0.296875
5          6 - Saturday            0.355932
6            7 - Sunday            0.311927
