In [3]:
# Import Packages and Libraries
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns
from plotly.subplots import make_subplots

!pip install statsmodels
import statsmodels.stats.api as sms
from scipy.stats import ttest_1samp,shapiro,levene,ttest_ind,mannwhitneyu, \
    pearsonr,spearmanr,kendalltau,f_oneway,kruskal
from statsmodels.stats.proportion import proportions_ztest



In [4]:
# Take a look at the datasets
df_control=pd.read_csv("control_group.csv",sep=";")
df_control.head()

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Control Campaign,2.08.2019,1757,121040.0,102513.0,8110.0,2033.0,1841.0,1219.0,511.0
2,Control Campaign,3.08.2019,2343,131711.0,110862.0,6508.0,1737.0,1549.0,1134.0,372.0
3,Control Campaign,4.08.2019,1940,72878.0,61235.0,3065.0,1042.0,982.0,1183.0,340.0
4,Control Campaign,5.08.2019,1835,,,,,,,


In [5]:
df_test=pd.read_csv("test_group.csv",sep=";")
df_test.head()

Unnamed: 0,Campaign Name,Date,Spend [USD],# of Impressions,Reach,# of Website Clicks,# of Searches,# of View Content,# of Add to Cart,# of Purchase
0,Test Campaign,1.08.2019,3008,39550,35820,3038,1946,1069,894,255
1,Test Campaign,2.08.2019,2542,100719,91236,4657,2359,1548,879,677
2,Test Campaign,3.08.2019,2365,70263,45198,7885,2572,2367,1268,578
3,Test Campaign,4.08.2019,2710,78451,25937,4216,2216,1437,566,340
4,Test Campaign,5.08.2019,2297,114295,95138,5863,2106,858,956,768


In [7]:
df_control.columns = ["Campaign Name", "Date", "Amount Spent",
                        "Number of Impressions", "Reach", "Website Clicks",
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]

df_test.columns = ["Campaign Name", "Date", "Amount Spent",
                        "Number of Impressions", "Reach", "Website Clicks",
                        "Searches Received", "Content Viewed", "Added to Cart",
                        "Purchases"]

In [8]:
# Analyze Missing Value
df_control.isnull().sum()

Unnamed: 0,0
Campaign Name,0
Date,0
Amount Spent,0
Number of Impressions,1
Reach,1
Website Clicks,1
Searches Received,1
Content Viewed,1
Added to Cart,1
Purchases,1


In [9]:
df_test.isnull().sum()

Unnamed: 0,0
Campaign Name,0
Date,0
Amount Spent,0
Number of Impressions,0
Reach,0
Website Clicks,0
Searches Received,0
Content Viewed,0
Added to Cart,0
Purchases,0


In [10]:
# Fill missing values in each specified column with the mean of that column
df_control["Number of Impressions"].fillna(value=df_control["Number of Impressions"].mean(),inplace=True)
df_control["Reach"].fillna(value=df_control["Reach"].mean(),inplace=True)
df_control["Website Clicks"].fillna(value=df_control["Website Clicks"].mean(),inplace=True)
df_control["Searches Received"].fillna(value=df_control["Searches Received"].mean(),inplace=True)
df_control["Added to Cart"].fillna(value=df_control["Added to Cart"].mean(),inplace=True)
df_control["Purchases"].fillna(value=df_control["Purchases"].mean(),inplace=True)
df_control["Content Viewed"].fillna(value=df_control["Content Viewed"].mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_control["Number of Impressions"].fillna(value=df_control["Number of Impressions"].mean(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_control["Reach"].fillna(value=df_control["Reach"].mean(),inplace=True)
The behavior will change in pandas 3.0. This inplace

In [11]:
df_control.isnull().sum()

Unnamed: 0,0
Campaign Name,0
Date,0
Amount Spent,0
Number of Impressions,0
Reach,0
Website Clicks,0
Searches Received,0
Content Viewed,0
Added to Cart,0
Purchases,0


In [12]:
# Merge datasets
df=pd.concat([df_control, df_test], ignore_index=True)
df.head()

Unnamed: 0,Campaign Name,Date,Amount Spent,Number of Impressions,Reach,Website Clicks,Searches Received,Content Viewed,Added to Cart,Purchases
0,Control Campaign,1.08.2019,2280,82702.0,56930.0,7016.0,2290.0,2159.0,1819.0,618.0
1,Control Campaign,2.08.2019,1757,121040.0,102513.0,8110.0,2033.0,1841.0,1219.0,511.0
2,Control Campaign,3.08.2019,2343,131711.0,110862.0,6508.0,1737.0,1549.0,1134.0,372.0
3,Control Campaign,4.08.2019,1940,72878.0,61235.0,3065.0,1042.0,982.0,1183.0,340.0
4,Control Campaign,5.08.2019,1835,109559.758621,88844.931034,5320.793103,2221.310345,1943.793103,1300.0,522.793103


In [13]:
# Group the DataFrame by 'Campaign Name' and calculate the mean of specified columns for each group
df.groupby("Campaign Name").agg({
    "Purchases": "mean",  # Calculate the mean of 'Purchases'
    "Number of Impressions": "mean",  # Calculate the mean of 'Number of Impressions'
    "Website Clicks": "mean",  # Calculate the mean of 'Website Clicks'
    "Content Viewed": "mean",  # Calculate the mean of 'Content Viewed'
    "Added to Cart": "mean"  # Calculate the mean of 'Added to Cart'
})

Unnamed: 0_level_0,Purchases,Number of Impressions,Website Clicks,Content Viewed,Added to Cart
Campaign Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Control Campaign,522.793103,109559.758621,5320.793103,1943.793103,1300.0
Test Campaign,521.233333,74584.8,6032.333333,1858.0,881.533333


In [15]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define the structure of the subplots (2 rows, 3 columns, all pie chart types)
specs = [[{'type':'domain'}, {'type':'domain'},{'type':'domain'}], [{'type':'domain'}, {'type':'domain'},{'type':'domain'}]]
# Create subplots with specified titles for each pie chart
fig = make_subplots(rows=2, cols=3, specs=specs, subplot_titles=['Number of Impressions', 'Website Clicks',"Content Viewed",
                                                                 "Added to Cart","Amount Spent","Reach"])

# Define the colors for the pie chart segments
marker_colors = ["#FF6347", "#4682B4"] # Tomato and SteelBlue

# Define the labels for the pie chart segments
labels=["Control Campaign","Test Campaign"]
# Calculate the sum of each metric for the control group
values= [sum(df_control["Number of Impressions"]),sum(df_test["Number of Impressions"])]
values1= [sum(df_control["Website Clicks"]),sum(df_test["Website Clicks"])]
values2= [sum(df_control["Content Viewed"]),sum(df_test["Content Viewed"])]
values3= [sum(df_control["Added to Cart"]),sum(df_test["Added to Cart"])]
values4= [sum(df_control["Amount Spent"]),sum(df_test["Amount Spent"])]
values5= [sum(df_control["Reach"]),sum(df_test["Reach"])]

# Add pie chart traces to the subplots
fig.add_trace(go.Pie(labels=labels, values=values,
                     marker_colors=marker_colors),1,1)

fig.add_trace(go.Pie(labels=labels, values=values1,
                     marker_colors=marker_colors),1, 2)

fig.add_trace(go.Pie(labels=labels, values=values2,
                     marker_colors=marker_colors),1, 3)

fig.add_trace(go.Pie(labels=labels, values=values3,
                     marker_colors=marker_colors),2, 1)

fig.add_trace(go.Pie(labels=labels, values=values4,
                     marker_colors=marker_colors),2, 2)

fig.add_trace(go.Pie(labels=labels, values=values5,
                     marker_colors=marker_colors),2, 3)

# Display the figure
fig.show()

In [16]:
# Perform Shapiro-Wilk test for normality on the 'Purchases' data for the Control Campaign
test_stat,pvalue=shapiro(df.loc[df["Campaign Name"]=="Control Campaign","Purchases"])
print("Test Stat =%.4f, p-value=%.4f" % (test_stat,pvalue))

# Interpret the results of the Shapiro-Wilk test
if pvalue < 0.05:  # You can adjust the significance level as needed
    print("The Test Campaign significantly outperforms the Control Campaign.")
else:
    print("There is no significant difference between the Test and Control Campaigns.")

Test Stat =0.9433, p-value=0.1114
There is no significant difference between the Test and Control Campaigns.


In [17]:
# Perform Shapiro-Wilk test for normality on the 'Purchases' data for the Test Campaign
test_stat,pvalue=shapiro(df.loc[df["Campaign Name"]=="Test Campaign","Purchases"])
print("Test Stat =%.4f, p-value=%.4f" % (test_stat,pvalue))

# Interpret the results of the Shapiro-Wilk test for normality
if pvalue < 0.05:  # You can adjust the significance level as needed
    print("The data does not follow a normal distribution.")
else:
    print("The data follows a normal distribution (or there is no significant evidence against normality).")

Test Stat =0.9182, p-value=0.0241
The data does not follow a normal distribution.


In [18]:
test_stat,pvalue=ttest_ind(df.loc[df["Campaign Name"]=="Control Campaign","Purchases"],
                        df.loc[df["Campaign Name"]=="Test Campaign","Purchases"],equal_var=True)

print("Test Stat =%.4f, p-value=%.4f" % (test_stat,pvalue))

if pvalue < 0.05:  # You can adjust the significance level as needed
    print("The Test Campaign significantly outperforms the Control Campaign.")
else:
    print("There is no significant difference between the Test and Control Campaigns.")
    print("Null Hypothesis is not rejected")

Test Stat =0.0307, p-value=0.9756
There is no significant difference between the Test and Control Campaigns.
Null Hypothesis is not rejected


Therefore, based on the standard significance level of 0.05, we do not have enough evidence to reject the null hypothesis. This means we conclude that there is no statistically significant difference in the average number of purchases between the Control Campaign and the Test Campaign.

To gain a clearer understanding of any potential differences, consider increasing the observation period or sample size for the A/B test. Additionally, analyzing other relevant key performance indicators (KPIs) such as Click-Through Rate (CTR) and Conversion Rate could provide further insights.

**Click-Through Rate (CTR)** is the percentage of individuals viewing (impressions) a web page who view and then click on a specific advertisement that appears on that page. It measures how successful an ad has been in capturing users' attention. The higher the click-through rate, the more successful the ad has been in generating interest.

**Conversion Rate** is the ratio of users who take a desired action (e.g., making a purchase) to the total number of users who clicked on the ad.

**CPC (Cost Per Click)** is a metric that determines how much advertisers pay for the ads they place on websites or social media, based on the number of clicks the ad receives. CPC is important for marketers to consider, since it measures the price for a brand's paid advertising campaigns.

**Return On Investment (ROI)** provides an overview of the effectiveness of the advertising campaign.

In [19]:
# Calculate Click-Through Rate (CTR)
df['CTR'] = (df['Website Clicks'] / df['Number of Impressions']) * 100
# Calculate Conversion Rate
df['Conversion Rate'] = (df['Purchases'] / df['Website Clicks']) * 100
# Calculate Cost Per Click (CPC)
df['CPC'] = df['Amount Spent'] / df['Website Clicks']
# Calculate Return On Investment (ROI)
df['ROI'] = ((df['Purchases'] - df['Amount Spent']) / df['Amount Spent']) * 100

# Display the first few rows with the new metrics
df[['CTR', 'Conversion Rate','CPC','ROI']].head()

Unnamed: 0,CTR,Conversion Rate,CPC,ROI
0,8.483471,8.808438,0.324971,-72.894737
1,6.700264,6.300863,0.216646,-70.916335
2,4.941121,5.716042,0.360018,-84.122919
3,4.205659,11.092985,0.632953,-82.474227
4,4.856521,9.825473,0.344873,-71.509913


In [20]:
df_KPIs=df.groupby("Campaign Name").agg({"CTR":"mean","Conversion Rate":"mean","CPC":"mean","ROI":"mean"})
df_KPIs

Unnamed: 0_level_0,CTR,Conversion Rate,CPC,ROI
Campaign Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Control Campaign,5.087893,11.422146,0.489907,-76.619613
Test Campaign,10.24226,9.231182,0.468718,-79.342253


In [21]:
from scipy import stats

# Calculate CTR for the test and control groups
df_test['CTR'] = df_test['Website Clicks'] / df_test['Number of Impressions']
df_control['CTR'] = df_control['Website Clicks'] / df_control['Number of Impressions']

# Perform an independent samples t-test on the CTR
t_stat, p_value = stats.ttest_ind(df_control['CTR'],df_test['CTR'] )
print("p value : {}".format(p_value))

# Interpret the results of the t-test based on the p-value
if p_value < 0.05:  # You can adjust the significance level (alpha) as needed
    print("The Test Campaign significantly outperforms the Control Campaign.")
    print("There is significant differences between Click through rate results for two campaigns")
else:
    print("There is no significant difference between the Test and Control Campaigns.")

p value : 0.00018398526312426124
The Test Campaign significantly outperforms the Control Campaign.
There is significant differences between Click through rate results for two campaigns


In [22]:
from scipy import stats

df_control['Conversion Rate'] = df_control['Purchases'] /df_control['Website Clicks']
df_test['Conversion Rate'] = df_test['Purchases'] / df_test['Website Clicks']

t_stat, p_value = stats.ttest_ind(df_test['Conversion Rate'], df_control['Conversion Rate'])
print("p value : {}".format(p_value))

if p_value < 0.05:
    print("The Test Campaign significantly outperforms the Control Campaign.")
    print("There is no significant difference between Click through rate for two campaigns")
else:
    print("There is no significant difference between the Test and Control Campaigns.")

p value : 0.14214717143258024
There is no significant difference between the Test and Control Campaigns.


In [23]:
from scipy import stats

df_control['CPC'] = df_control['Amount Spent'] /df_control['Website Clicks']
df_test['CPC'] = df_test['Amount Spent'] / df_test['Website Clicks']

t_stat, p_value = stats.ttest_ind(df_test['Conversion Rate'], df_control['Conversion Rate'])
print("p value : {}".format(p_value))

if p_value < 0.05:
    print("The Test Campaign significantly outperforms the Control Campaign.")
    print("There is no significant difference between Click through rate for two campaigns")
else:
    print("There is no significant difference between the Test and Control Campaigns.")

p value : 0.14214717143258024
There is no significant difference between the Test and Control Campaigns.


While the overall purchase numbers did not show a statistically significant difference between the two campaigns, analyzing other key metrics provides further insight:

*   **Conversion Rate:** Both campaigns exhibit similar conversion rates, suggesting that once a user clicks on an ad, they are equally likely to make a purchase regardless of the campaign.
*   **Click-Through Rate (CTR):** The Test Campaign demonstrates a higher CTR compared to the Control Campaign. This indicates that the ads in the Test Campaign were more effective at attracting user attention and generating clicks.
*   **Cost Per Click (CPC):** The Test Campaign shows a lower CPC. A lower CPC means that the test campaign is more cost-efficient in generating clicks, potentially leading to more leads within the same budget and a better potential ROI.

In summary, while the Test Campaign didn't lead to a statistically significant increase in total purchases during this period, it was more effective at generating clicks and did so at a lower cost per click. This suggests that the Test Campaign was more efficient in driving traffic to the website. Further analysis or a longer testing period might reveal a significant impact on purchases, especially given the improved CTR and CPC.