The dataset we are using here contains two data files about two marketing campaigns (Control Campaign and Test Campaign).

In [1]:
import pandas as pd
import datetime
from datetime import date, timedelta
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

control_data = pd.read_csv("/kaggle/input/example-dataset-for-ab-test/control_group.csv", sep =';')
test_data = pd.read_csv("/kaggle/input/example-dataset-for-ab-test/test_group.csv", sep =';')

In [2]:
control_data.head(2)

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Control Campaign,2.08.2019,1757,121040.0,102513.0,8110.0,2033.0,1841.0,1219.0,511.0


In [3]:
test_data.head(2)

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
0,Test Campaign,1.08.2019,3008,39550,35820,3038,1946,1069,894,255
1,Test Campaign,2.08.2019,2542,100719,91236,4657,2359,1548,879,677


prepare the Data

* Update column name 
* Search for null value
* Replace null value with mean value

In [4]:
control_data.columns = ["Campaign Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]

test_data.columns = ["Campaign Name", "Date", "Amount Spent", 
                        "Number of Impressions", "Reach", "Website Clicks", 
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]

In [5]:
control_data.head(2)

Unnamed: 0,Campaign Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Control Campaign,2.08.2019,1757,121040.0,102513.0,8110.0,2033.0,1841.0,1219.0,511.0


In [6]:
test_data.head(2)

Unnamed: 0,Campaign Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
0,Test Campaign,1.08.2019,3008,39550,35820,3038,1946,1069,894,255
1,Test Campaign,2.08.2019,2542,100719,91236,4657,2359,1548,879,677


In [7]:
control_data.isnull().sum()

Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    1
Reach                    1
Website Clicks           1
Searches Received        1
Content Viewed           1
Added to Cart            1
Purchases                1
dtype: int64

In [8]:
test_data.isnull().sum()

Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    0
Reach                    0
Website Clicks           0
Searches Received        0
Content Viewed           0
Added to Cart            0
Purchases                0
dtype: int64

In [9]:
control_data["Number of Impressions"].fillna(value=control_data["Number of Impressions"].mean(), 
                                             inplace=True)
control_data["Reach"].fillna(value=control_data["Reach"].mean(), 
                             inplace=True)
control_data["Website Clicks"].fillna(value=control_data["Website Clicks"].mean(), 
                                      inplace=True)
control_data["Searches Received"].fillna(value=control_data["Searches Received"].mean(), 
                                         inplace=True)
control_data["Content Viewed"].fillna(value=control_data["Content Viewed"].mean(), 
                                      inplace=True)
control_data["Added to Cart"].fillna(value=control_data["Added to Cart"].mean(), 
                                     inplace=True)
control_data["Purchases"].fillna(value=control_data["Purchases"].mean(), 
                                 inplace=True)

In [10]:
control_data.isnull().sum()

Campaign Name            0
Date                     0
Amount Spent             0
Number of Impressions    0
Reach                    0
Website Clicks           0
Searches Received        0
Content Viewed           0
Added to Cart            0
Purchases                0
dtype: int64

In [11]:
control_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Campaign Name          30 non-null     object 
 1   Date                   30 non-null     object 
 2   Amount Spent           30 non-null     int64  
 3   Number of Impressions  30 non-null     float64
 4   Reach                  30 non-null     float64
 5   Website Clicks         30 non-null     float64
 6   Searches Received      30 non-null     float64
 7   Content Viewed         30 non-null     float64
 8   Added to Cart          30 non-null     float64
 9   Purchases              30 non-null     float64
dtypes: float64(7), int64(1), object(2)
memory usage: 2.5+ KB


In [12]:
control_data.describe()

Unnamed: 0,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,2288.433333,109559.758621,88844.931034,5320.793103,2221.310345,1943.793103,1300.0,522.793103
std,367.334451,21311.695472,21452.627592,1726.803732,851.025795,764.021907,400.371207,181.810508
min,1757.0,71274.0,42859.0,2277.0,1001.0,848.0,442.0,222.0
25%,1945.5,95191.25,75300.25,4122.25,1629.25,1249.0,942.5,375.5
50%,2299.5,112368.0,91418.0,5272.396552,2340.0,1979.5,1319.5,506.0
75%,2532.0,121259.0,101958.75,6609.5,2655.0,2360.5,1638.0,663.25
max,3083.0,145248.0,127852.0,8137.0,4891.0,4219.0,1913.0,800.0


In [13]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Campaign Name          30 non-null     object
 1   Date                   30 non-null     object
 2   Amount Spent           30 non-null     int64 
 3   Number of Impressions  30 non-null     int64 
 4   Reach                  30 non-null     int64 
 5   Website Clicks         30 non-null     int64 
 6   Searches Received      30 non-null     int64 
 7   Content Viewed         30 non-null     int64 
 8   Added to Cart          30 non-null     int64 
 9   Purchases              30 non-null     int64 
dtypes: int64(8), object(2)
memory usage: 2.5+ KB


In [14]:
test_data.describe()

Unnamed: 0,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,2563.066667,74584.8,53491.566667,6032.333333,2418.966667,1858.0,881.533333,521.233333
std,348.687681,32121.377422,28795.775752,1708.567263,388.742312,597.654669,347.584248,211.047745
min,1968.0,22521.0,10598.0,3038.0,1854.0,858.0,278.0,238.0
25%,2324.5,47541.25,31516.25,4407.0,2043.0,1320.0,582.5,298.0
50%,2584.0,68853.5,44219.5,6242.5,2395.5,1881.0,974.0,500.0
75%,2836.25,99500.0,78778.75,7604.75,2801.25,2412.0,1148.5,701.0
max,3112.0,133771.0,109834.0,8264.0,2978.0,2801.0,1391.0,890.0


Merge both Dataset

In [15]:
ab_data = control_data.merge(test_data, 
                             how="outer").sort_values(["Date"])
ab_data = ab_data.reset_index(drop=True)
ab_data.head()



Unnamed: 0,Campaign Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Test Campaign,1.08.2019,3008,39550.0,35820.0,3038.0,1946.0,1069.0,894.0,255.0
2,Test Campaign,10.08.2019,2790,95054.0,79632.0,8125.0,2312.0,1804.0,424.0,275.0
3,Control Campaign,10.08.2019,2149,117624.0,91257.0,2277.0,2475.0,1984.0,1629.0,734.0
4,Test Campaign,11.08.2019,2420,83633.0,71286.0,3750.0,2893.0,2617.0,1075.0,668.0


have a look if the dataset has an equal number of samples about both campaigns

In [16]:
ab_data["Campaign Name"].value_counts()

Control Campaign    30
Test Campaign       30
Name: Campaign Name, dtype: int64

A/B Testing to Find the Best Marketing Strategy

In [17]:
figure = px.scatter(data_frame = ab_data, 
                    x="Number of Impressions",
                    y="Amount Spent", 
                    size="Amount Spent", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

# Control Campaign shows more impressions according to the amount spent on both campaigns

Number of searches performed on the website from both campaigns

In [18]:
label = ["Total Searches from Control Campaign", 
         "Total Searches from Test Campaign"]
counts = [sum(control_data["Searches Received"]), 
          sum(test_data["Searches Received"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Searches')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

# Audience of the control campaign viewed more content than the test campaign. 
# Although there is not much difference, as the website clicks of the control campaign were low, its engagement on the website is higher than the test campaign


In [19]:
# the number of products added to the cart from both campaigns

label = ["Products Added to Cart from Control Campaign", 
         "Products Added to Cart from Test Campaign"]
counts = [sum(control_data["Added to Cart"]), 
          sum(test_data["Added to Cart"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Added to Cart')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

# Despite low website clicks more products were added to the cart from the control campaign

In [20]:
# let’s have a look at the amount spent on both campaigns

label = ["Amount Spent in Control Campaign", 
         "Amount Spent in Test Campaign"]
counts = [sum(control_data["Amount Spent"]), 
          sum(test_data["Amount Spent"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Amount Spent')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

#The amount spent on the test campaign is higher than the control campaign. 
# But as we can see that the control campaign resulted in more content views and more products in the cart, the control campaign is more efficient than the test campaign.

In [21]:
# let’s have a look at the purchases made by both campaigns

label = ["Purchases Made by Control Campaign", 
         "Purchases Made by Test Campaign"]
counts = [sum(control_data["Purchases"]), 
          sum(test_data["Purchases"])]
colors = ['gold','lightgreen']
fig = go.Figure(data=[go.Pie(labels=label, values=counts)])
fig.update_layout(title_text='Control Vs Test: Purchases')
fig.update_traces(hoverinfo='label+percent', textinfo='value', 
                  textfont_size=30,
                  marker=dict(colors=colors, 
                              line=dict(color='black', width=3)))
fig.show()

#There’s only a difference of around 1% in the purchases made from both ad campaigns. 
# As the Control campaign resulted in more sales in less amount spent on marketing, the control campaign wins here!

In [22]:
# Let’s analyze some metrics to find which ad campaign converts more. 
# Let's look at the relationship between the number of website clicks and content viewed from both campaigns

figure = px.scatter(data_frame = ab_data, 
                    x="Content Viewed",
                    y="Website Clicks", 
                    size="Website Clicks", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

# The website clicks are higher in the test campaign, but the engagement from website clicks is higher in the control campaign

In [23]:
# analyze the relationship between the amount of content viewed and the number of products added to the cart

figure = px.scatter(data_frame = ab_data, 
                    x="Added to Cart",
                    y="Content Viewed", 
                    size="Added to Cart", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

# Control Campaign wins again

In [24]:
# the relationship between the number of products added to the cart and the number of sales 

figure = px.scatter(data_frame = ab_data, 
                    x="Purchases",
                    y="Added to Cart", 
                    size="Purchases", 
                    color= "Campaign Name", 
                    trendline="ols")
figure.show()

# The conversation rate of the test campaign is higher.

According to the results of the above A/B tests, the control campaign generated greater revenue and visitor interaction. The control campaign's products were viewed more often, which led to more items being added to shopping carts and more sales. However, the test campaign had a greater conversation rate for items in the shopping basket. According to the products that were viewed and added to the cart, the test campaign generated greater purchases. Additionally, the control campaign generates overall higher sales. As a result, the Control campaign can be used to market a variety of products to a larger audience while the Test campaign may be used to promote a particular product to a specific group of people.