# A/B Testing for marketing campaigns

A/B testing is used to find the best marketing strategies. 

### Import elements

In [1]:
# import librairies
import pandas as pd
import datetime
from datetime import date, timedelta
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

In [4]:
# import data
control_group = pd.read_csv("control_group.csv", sep = ";")
test_group = pd.read_csv("test_group.csv", sep = ";")

In [5]:
# familiariser avec les données
print(control_group.head())
print(test_group.head())

      Campaign Name       Date  Spend [USD]  # of Impressions     Reach  \
0  Control Campaign  1.08.2019         2280           82702.0   56930.0   
1  Control Campaign  2.08.2019         1757          121040.0  102513.0   
2  Control Campaign  3.08.2019         2343          131711.0  110862.0   
3  Control Campaign  4.08.2019         1940           72878.0   61235.0   
4  Control Campaign  5.08.2019         1835               NaN       NaN   

   # of Website Clicks  # of Searches  # of View Content  # of Add to Cart  \
0               7016.0         2290.0             2159.0            1819.0   
1               8110.0         2033.0             1841.0            1219.0   
2               6508.0         1737.0             1549.0            1134.0   
3               3065.0         1042.0              982.0            1183.0   
4                  NaN            NaN                NaN               NaN   

   # of Purchase  
0          618.0  
1          511.0  
2          372.0  
3   

## Data preparation

### Missing values

In [6]:
control_group.columns = ["Campaign Name", "Date", "Amount Spent", "Number of Impressions", "Reach", "Website Clicks", "Searches Received",
                         "Content Viewed", "Added to Cart", "Purchases"]

test_group.columns = ["Campaign Name", "Date", "Amount Spent", "Number of Impressions", "Reach", "Website Clicks", "Searches Received", 
                      "Content Viewed", "Added to Cart", "Purchases"]

### Missing values

In [7]:
## identify missing values
print(control_group.isnull().sum())
print(test_group.isnull().sum())

Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    1
Reach                    1
Website Clicks           1
Searches Received        1
Content Viewed           1
Added to Cart            1
Purchases                1
dtype: int64
Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    0
Reach                    0
Website Clicks           0
Searches Received        0
Content Viewed           0
Added to Cart            0
Purchases                0
dtype: int64


In [8]:
## impute missing values for control_group dataframe
control_group["Number of Impressions"].fillna(value=control_group["Number of Impressions"].mean(), inplace=True)
control_group["Reach"].fillna(value=control_group["Reach"].mean(), inplace=True)
control_group["Website Clicks"].fillna(value=control_group["Website Clicks"].mean(), inplace=True)
control_group["Searches Received"].fillna(value=control_group["Searches Received"].mean(), inplace=True)
control_group["Content Viewed"].fillna(value=control_group["Content Viewed"].mean(), inplace=True)
control_group["Added to Cart"].fillna(value=control_group["Added to Cart"].mean(), inplace=True)
control_group["Purchases"].fillna(value=control_group["Purchases"].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  control_group["Number of Impressions"].fillna(value=control_group["Number of Impressions"].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  control_group["Reach"].fillna(value=control_group["Reach"].mean(), inplace=True)
The behavior will change in pandas 3.0

### Merge two dataframes

In [11]:
## concatenate two dataframe
ab_data = control_group.merge(test_group, 
                             how="outer").sort_values(["Date"])
ab_data = ab_data.reset_index(drop=True)
print(ab_data.head())

      Campaign Name        Date  Amount Spent  Number of Impressions    Reach  \
0  Control Campaign   1.08.2019          2280                82702.0  56930.0   
1     Test Campaign   1.08.2019          3008                39550.0  35820.0   
2  Control Campaign  10.08.2019          2149               117624.0  91257.0   
3     Test Campaign  10.08.2019          2790                95054.0  79632.0   
4  Control Campaign  11.08.2019          2490               115247.0  95843.0   

   Website Clicks  Searches Received  Content Viewed  Added to Cart  Purchases  
0          7016.0             2290.0          2159.0         1819.0      618.0  
1          3038.0             1946.0          1069.0          894.0      255.0  
2          2277.0             2475.0          1984.0         1629.0      734.0  
3          8125.0             2312.0          1804.0          424.0      275.0  
4          8137.0             2941.0          2486.0         1887.0      475.0  


  ab_data = control_group.merge(test_group,


## Explore data

In [12]:
## count the number of campaign
## it is important to verify if the number of campaign A is equal to the number of campaign B
print(ab_data["Campaign Name"].value_counts())

Campaign Name
Control Campaign    30
Test Campaign       30
Name: count, dtype: int64


## A/B Testing : Explore pattern (visualization, etc...)

First of all, we need to analyze the relationship between the number of impressions we got and the amount spent for each campaigns. Then, we focus on the amount of searches, website's clicks, content viewed, added cart, amount spent and purchases. 

In [13]:
## Amoutn spent
figure = px.scatter(data_frame = ab_data, 
                    x="Number of Impressions",
                    y="Amount Spent", 
                    size="Amount Spent", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

In [16]:
## Searches 
label = ["Total Searches from Control Campaign", 
         "Total Searches from Test Campaign"]
counts = [sum(control_group["Searches Received"]), 
          sum(test_group["Searches Received"])]
colors = ['lightblue','orange']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Searches')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

In [17]:
label = ["Website Clicks from Control Campaign", 
         "Website Clicks from Test Campaign"]
counts = [sum(control_group["Website Clicks"]), 
          sum(test_group["Website Clicks"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Website Clicks')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

In [18]:
## Content Viewed
label = ["Content Viewed from Control Campaign", 
         "Content Viewed from Test Campaign"]
counts = [sum(control_group["Content Viewed"]), 
          sum(test_group["Content Viewed"])]
colors = ['pink','violet']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Content Viewed')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

In [20]:
## Added to cart
label = ["Products Added to Cart from Control Campaign", 
         "Products Added to Cart from Test Campaign"]
counts = [sum(control_group["Added to Cart"]), 
          sum(test_group["Added to Cart"])]
colors = ['grey','white']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Added to Cart')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

In [21]:
## Amount spent
label = ["Amount Spent in Control Campaign", 
         "Amount Spent in Test Campaign"]
counts = [sum(control_group["Amount Spent"]), 
          sum(test_group["Amount Spent"])]
colors = ['red','blue']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Amount Spent')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

In [22]:
## Purchases
label = ["Purchases Made by Control Campaign", 
         "Purchases Made by Test Campaign"]
counts = [sum(control_group["Purchases"]), 
          sum(test_group["Purchases"])]
colors = ['lightyellow','green']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Purchases')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()