In [358]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [359]:
import os
import ast
import logging
import numpy as np
import pandas as pd
import awswrangler as wr
import matplotlib.pyplot as plt
from datetime import timedelta
import seaborn as sns
from scipy.stats import ttest_ind
from general_functions.return_account_ids import return_account_ids
from general_functions.sanitize_accout_name import sanitize_account_name
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [360]:
customer = "Asambeauty"
customer_dict = {
    "to teach":{
        "brand_string":"Brand Bidding", 
        "conversionActionId":"68cbfa5577b55cfe740fbc8b"
        },
    "Kfzteile24": {
        "adAccountId":"2252905905",
        "conversionActionId":"68820a2deafeb4bf3b501beb"
    },
    "Asambeauty": {
        "brand_string":"01|02",
        "conversionActionId":"682ed8362fc068cde38c3dff",
        "campaign_insight": ["DE_02_Pflegelinie_SUN","DE_02_Pflegelinie_SUN Innkeepr3.0","DE_02_Pflegelinien_Magic_Finish_Phrase","DE_02_Pflegelinien_Magic_Finish_Phrase Innkeepr3.0"],
        "alpha_values": {'2025-10-09': '40052.350000',
 '2025-10-10': '46.859553',
 '2025-10-11': '18.947135',
 '2025-10-12': '22.313289',
 '2025-10-13': '7.009723',
 '2025-10-14': '15.199885',
 '2025-10-15': '17.851920',
 '2025-10-16': '8.264985',
 '2025-10-17': '10.702888',
 '2025-10-18': '13.881490',
 '2025-10-19': '67.045563',
 '2025-10-20': '11.733726',
 '2025-10-21': '12.912308',
 '2025-10-22': '12.353640',
 '2025-10-23': '7.661739',
 '2025-10-24': '6.540622',
 '2025-10-25': '12.677954',
 '2025-10-26': '7.136677',
 '2025-10-27': '3.280932',
 '2025-10-28': '-1.969261',
 '2025-10-29': '1.624777',
 '2025-10-30': '-4.700424',
 '2025-10-31': '-10.952207',
 '2025-11-01': '-8.000534'}
    },
    "LILLYDOO": {
        "generic_string":"generic",
        "conversionActionId":"68c2d9007bd2ec4485bb98ed",
        "alpha_values": "x"
    }
}
customer_sanitized = sanitize_account_name(customer)
path_save = f"/Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/SprintStories/EN-2874-brand-vs-generic/data/{customer_sanitized}/"
start_date = "20251015"
end_date = "20251101"
date_range = pd.date_range(start=start_date, end=end_date, freq="D").strftime("%Y%m%d").tolist()
if not os.path.exists(path_save):
    os.makedirs(path_save)
url = return_api_url()
print(f"url = {url}")
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == customer]
account_id = account_id[0]

# Get Data

## Load Targeting History of Conversion Actions

In [361]:
conversion_action_id = customer_dict[customer]["conversionActionId"]
data_file_path = f"{path_save}targeting_history_{customer}_{conversion_action_id}_{start_date}_{end_date}.csv"
try:
    df = pd.read_csv(data_file_path)
except FileNotFoundError:
    print("File not found, creating new DataFrame.")
    df = pd.DataFrame()
    for date in date_range:
        try:
            print(f"Reading data for {date}")
            temp = wr.s3.read_parquet(f"s3://{account_id}/targeting.history/{date}/ga_conversion_update_{conversion_action_id}.parquet")
        except wr.exceptions.NoFilesFound:
            print(f". No data for {date}")
            continue
        temp["bucket_date"] = date
        df = pd.concat([df, temp])
    df.to_csv(data_file_path, index=False)
df = df[["created","profile","anonymousId","treatment","conv_prob","session.date","session","gclid_externalIds","properties.revenue","value_based_conversion_value","conv_created"]]
df = df.explode("gclid_externalIds")
df

In [362]:
df.columns

In [363]:
def return_gclid_id(x):
    x = ast.literal_eval(x)
    if isinstance(x, list):
        try:
            return x[0]["id"]
        except KeyError:
            return None
    return None

In [364]:
df["externalId"] = df["gclid_externalIds"].apply(lambda x: return_gclid_id(x))
df["externalId"] = df["externalId"].astype("string")

## Load Signals

In [365]:
signals = send_to_innkeepr_api_paginated(
    f"{url}/signals/query",
    account_id,
    {"externalId":df["externalId"].dropna().unique().tolist()},
    logging
)
signals = pd.json_normalize(signals)
signals

In [366]:
# fitler for adAccountId
print(f"Found {len(signals)} signals")
if "adAccountId" in customer_dict[customer].keys():
    ad_account_id = customer_dict[customer]["adAccountId"]
    signals = signals[signals["relates_to.campaign.resourceName"].str.contains(ad_account_id).fillna(False)]
    print(f"Found {len(signals)} signals with adAccountId {ad_account_id}")
elif "brand_string" in customer_dict[customer].keys():
    brand_string = customer_dict[customer]["brand_string"]
    signals = signals[signals["relates_to.campaign.name"].str.contains(brand_string).fillna(False)]
    print(f"Found {len(signals)} signals with brand_string {brand_string}")
elif "generic_string" in customer_dict[customer].keys():
    generic_string = customer_dict[customer]["generic_string"]
    signals = signals[signals["relates_to.campaign.name"].str.contains(generic_string).fillna(False)==False]
else:
    raise Exception("adAccountId or brand_string not found in customer_dict")


In [367]:
signals["relates_to.campaign.name"].sort_values().unique()

## Merge Targeting History and Signal Id

In [368]:
signals["externalId"] = signals["externalId"].astype("string")
signals["brand traffic"] = True
df = pd.merge(df, signals[["externalId","relates_to.campaign.name","relates_to.adGroup.name", "brand traffic"]], on="externalId", how="left")
df["brand traffic"] = np.where(df["brand traffic"]==True, True, False)
df.head()

In [369]:
#brand traffic by date
dates_with_brand_traffic = df[df["brand traffic"] == True]["created"].unique()
print(f"dates with brand traffic: {dates_with_brand_traffic}")
df = df[df["created"].isin(dates_with_brand_traffic)]
print(f"Dates in data: {df['created'].unique()}")
df.shape

# Get Statistics

## t-test

In [370]:
group1 = df[df["brand traffic"] == True]["conv_prob"].astype("float")
group2 = df[df["brand traffic"] == False]["conv_prob"].astype("float")
t_statistic, p_value = ttest_ind(group1, group2)
print(f"t-statistic: {t_statistic}, p-value: {p_value}")
if p_value < 0.05:
    print("Is significant different")
    signficant = True
else:
    print("Is not significant different")
    signficant = False

In [371]:
df["conv_prob"] = df["conv_prob"].astype("float")

In [372]:
df.groupby("brand traffic")["conv_prob"].describe().transpose()

In [373]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic (signifcant = {signficant})")
sns.histplot(data=df, y="conv_prob", hue="brand traffic", kde=True, ax=ax1)
plt.grid(True)
plt.tight_layout()
fig.savefig(f"{path_save}brand_vs_generic_histogram.png")

In [374]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic (signifcant = {signficant})")
sns.boxplot(data=df, y="conv_prob", hue="brand traffic", ax=ax1)
plt.grid(True)
fig.savefig(f"{path_save}brand_vs_generic_boxplot.png")

In [375]:
sns.catplot(
    data=df, y="conv_prob", hue="brand traffic",
    kind="violin", bw_adjust=.5, cut=0, split=True,
)
plt.title(f"{customer}: brand vs. generic (signifcant = {signficant})")
plt.grid(True)
plt.tight_layout()
plt.savefig(f"{path_save}brand_vs_generic_catplot.png")


# Historic Analysis
Data is merged via anonymousId and not sessionId - so the conv created timestamp is necessary

## Load conversions

In [376]:
session_list = df["session"].dropna().unique().tolist()
min_date = df["session.date"].min()
min_date = pd.to_datetime(min_date).strftime("%Y-%m-%d")
max_date = pd.to_datetime(df["session.date"].max()) + timedelta(days=1)
max_date = pd.to_datetime(max_date).strftime("%Y-%m-%d")

path_conversions = f"{path_save}conversions_{min_date}_{max_date}.csv"
try:
    conversions = pd.read_csv(path_conversions)
except FileNotFoundError:
    conversions = pd.DataFrame()
    print(f"session_list = {len(session_list)}")
    date_range = pd.date_range(start=min_date, end=max_date, freq="D").strftime("%Y-%m-%d").tolist()
    for idate, date in enumerate(date_range):
        min_date = date
        if idate == len(date_range)-1:
            max_date = date_range[idate]
        else:
            max_date = date_range[idate+1]
        print(f"Reading data for {min_date} to {max_date}")
        temp = send_to_innkeepr_api_paginated(
            f"{url}/conversions/query",
            account_id,
            {
            "created": {
                "$gte": min_date,
                "$lte": max_date,
            },
            "name":["checkout_completed"]
            },
            logging
        )
        temp = pd.json_normalize(temp)
        print(f".. {temp.shape}")
        conversions = pd.concat([conversions, temp])
    conversions.to_csv(path_conversions, index=False)
conversions["conv_revenue"] = conversions["properties.revenue"].astype("float")
conversions["session.date"] = pd.to_datetime(conversions["created"]).dt.date
conversions = conversions[conversions["signal.source"]=="609ffc4578188e83a2bc2c2c"]
print(f"conversions before filtering with gclids: {conversions.shape}")
anonymousId_with_gclid = (
        df[df[f"gclid_externalIds"].isnull() == False]["anonymousId"]
        .unique()
        .tolist()
    )
print(
        f"fill_non_glicds_with_none: anonymousId_with_gclid = {len(anonymousId_with_gclid)}"
    )
conversions["profiles_for_smart_bidding"] = False
conversions["profiles_for_smart_bidding"] = np.where(
        conversions["anonymousId"].isin(anonymousId_with_gclid), True, False
    )
print(f"conversions after filtering with gclids: {conversions.shape}")
conversions

In [377]:
conversions["signal.source"].value_counts(dropna=False)

In [378]:
conv_sum_by_date = (
        conversions.drop_duplicates(subset=["sessionId"])
        .groupby(by=["session.date"])["conv_revenue"]
        .sum()
        .round(4)
    ).reset_index()
conv_sum_by_date

## Check sum

In [379]:
df["session.date"] = df["session.date"].astype("string")
conv_sum_by_date["session.date"] = conv_sum_by_date["session.date"].astype("string")
df["value_based_conversion_value"] = df["value_based_conversion_value"].astype("float")
df["properties.revenue"] = df["properties.revenue"].astype("float")

In [380]:
check_sum = pd.merge(
    df.groupby(by=["created","session.date"])[["properties.revenue","value_based_conversion_value"]].sum().reset_index(),
    conv_sum_by_date,
    how="left",
    on="session.date")#.dropna(subset=["properties.revenue","value_based_conversion_value"])
check_sum = check_sum.sort_values(by=["created", "session.date"], ascending=False)
check_sum

## Shift of conversion revenue brand vs. generic
- Nicht auf session ebene nachvollziehbar, da Ã¼ber anonymousIds gemerged wird. 

In [399]:
df["value_based_conversion_value"]=df["value_based_conversion_value"].astype("float")
alpha_values = customer_dict[customer]["alpha_values"]
print(alpha_values.keys())
def return_alpha(x, alpha_values):
    if x not in alpha_values.keys():
        return None
    return alpha_values[x]
df["alpha"]=df["session.date"].apply(lambda x: float(return_alpha(x, alpha_values)))
df = df.dropna(subset=["alpha"])
df["value_based_conversion_value_witout_alpha"] = df["value_based_conversion_value"] - df["alpha"]
df

In [402]:
conv_shift = df.groupby(by=["created","session.date","brand traffic"])[["value_based_conversion_value"]].sum().reset_index()
conv_shift

In [403]:
conversions["brand traffic"] = np.where(
    conversions["anonymousId"].isin(df[df["brand traffic"] == True]["anonymousId"].unique()), True, False
)
conv_sum_by_date_campaign_type = (
        conversions.drop_duplicates(subset=["sessionId"])
        .groupby(by=["brand traffic","session.date"])["conv_revenue"]
        .sum()
        .round(4)
    ).reset_index()
conv_sum_by_date_campaign_type["session.date"] = conv_sum_by_date_campaign_type["session.date"].astype("string")
conv_shift["session.date"] = conv_shift["session.date"].astype("string")
conv_shift = pd.merge(conv_shift, conv_sum_by_date_campaign_type, how="left", on=["session.date","brand traffic"]).sort_values(by=["created","session.date","brand traffic"]).reset_index(drop=True)
conv_shift

In [404]:
conv_shift["diff"] = conv_shift["value_based_conversion_value"] - conv_shift["conv_revenue"]
conv_shift["change"] = conv_shift["value_based_conversion_value"] / conv_shift["conv_revenue"]
conv_shift["change_in_perc"] = ((conv_shift["value_based_conversion_value"] / conv_shift["conv_revenue"]) - 1)*100
conv_shift

In [405]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic revenue change")
sns.lineplot(data=conv_shift, x="session.date", y="change_in_perc", hue="brand traffic", ax=ax1)
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_change_in_perc_in_revenue.png")

In [406]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic revenue change")
sns.lineplot(data=df, x="session.date", y="conv_prob", hue="brand traffic", ax=ax1)
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_conv_prob_by_session_date.png")

In [407]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic revenue change")
sns.lineplot(data=conv_shift, x="created", y="change_in_perc", hue="brand traffic", ax=ax1)
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_change_in_perc_in_revenue_by_created.png")

In [408]:
count_brand_traffic = df.groupby("created")["brand traffic"].value_counts(dropna=False).reset_index()
count_brand_traffic = count_brand_traffic.pivot(index="created", columns="brand traffic", values="count").reset_index()
count_brand_traffic

## Get change for conversion value wihtout alpha

In [410]:
df

In [423]:
df["change_in_perc_wo_alpha"] = np.where(df["properties.revenue"]==0, 100, ((df["value_based_conversion_value_witout_alpha"] / df["properties.revenue"]) - 1)*100)
df[["change_in_perc_wo_alpha","value_based_conversion_value_witout_alpha","properties.revenue","value_based_conversion_value"]]

In [417]:
df.groupby("brand traffic")["change_in_perc_wo_alpha"].describe().transpose()

In [420]:
df.groupby(by=["brand traffic","session.date"])["change_in_perc_wo_alpha"].describe()

In [422]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: brand vs. generic revenue change without alpha")
sns.lineplot(data=df, x="session.date", y="change_in_perc_wo_alpha", hue="brand traffic", ax=ax1)
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=90)
plt.ylim(-100, 100)
fig.savefig(f"{path_save}brand_vs_generic_change_in_perc_in_revenue_without_alpha.png")

## Anpassung simulieren

In [None]:
# divide in percentiles
# prob_stats = df["conv_prob"].describe()
# top75 = prob_stats["75%"]
# top50 = prob_stats["50%"]
# df["category"] = np.where(df["conv_prob"] >= top75, "Top 25%", np.where(df["conv_prob"] >= top50, "Middle 25%", "Bottom 50%"))
# temp = df.groupby("brand traffic")["category"].value_counts(dropna=False).reset_index()
# temp = temp.pivot(index="brand traffic", columns="category", values="count").reset_index()
# temp["percentage_bottom"] = temp["Bottom 50%"] / (temp["Bottom 50%"] + temp["Middle 25%"] + temp["Top 25%"])
# temp["percentage_middle"] = temp["Middle 25%"] / (temp["Bottom 50%"] + temp["Middle 25%"] + temp["Top 25%"])
# temp["percentage_top"] = temp["Top 25%"] / (temp["Bottom 50%"] + temp["Middle 25%"] + temp["Top 25%"])
# temp

In [None]:
# df["test_revenue_modification"]=None
# df["test_revenue_modification"] = np.where(df["category"]=="Top 25%", df["properties.revenue"]*1, df["test_revenue_modification"])
# df["test_revenue_modification"] = np.where(df["category"]=="Middle 25%", df["properties.revenue"]*0.95, df["test_revenue_modification"])
# df["test_revenue_modification"] = np.where(df["category"]=="Bottom 50%", df["properties.revenue"]*0.9, df["test_revenue_modification"])
# def transform_to_change(rev, mod):
#     if rev == 0:
#         return 0
#     else:
#         return ((mod /rev)-1)*100
# df["change_in_perc_test"] = df[["properties.revenue","test_revenue_modification"]].apply(lambda x: transform_to_change(x["properties.revenue"], x["test_revenue_modification"]), axis=1)
# df

In [None]:
# fig = plt.figure()
# ax1 = fig.add_subplot(1,1,1)
# ax1.set_title(f"{customer}: brand vs. generic revenue change")
# sns.lineplot(data=df, x="session.date", y="change_in_perc_test", hue="brand traffic", ax=ax1)
# plt.grid(True)
# plt.tight_layout()
# plt.xticks(rotation=90)
#fig.savefig(f"{path_save}brand_vs_generic_change_in_perc_in_revenue_test.png")

## Checkout certain campaigns

In [426]:
campaign_signals = signals[signals["relates_to.campaign.name"].isin(customer_dict[customer]["campaign_insight"])]
campaign_signals["relates_to.campaign.name"].value_counts()
conversion_campaigns = conversions[conversions["signal.externalId"].isin(campaign_signals["externalId"].unique())]
conversion_campaigns.shape

In [448]:
df_campaign_insights = df[df["relates_to.campaign.name"].isin(customer_dict[customer]["campaign_insight"])]
df_campaign_insights["relates_to.campaign.name"].value_counts()

In [449]:
df_campaign_insights = pd.merge(
    df_campaign_insights,
    conversion_campaigns[["created","properties.revenue","anonymousId"]], 
    how="left", 
    left_on=["conv_created","anonymousId"],
    right_on=["created","anonymousId"],
    suffixes=("", "_conv"))
df_campaign_insights=df_campaign_insights[["anonymousId","created","session.date","conv_prob","properties.revenue","value_based_conversion_value_witout_alpha","value_based_conversion_value","conv_created","relates_to.campaign.name"]]

In [454]:
df_campaign_insights = df_campaign_insights[df_campaign_insights["created"].astype("string")>"20251022"]
df_campaign_insights["properties.revenue"]=df_campaign_insights["properties.revenue"].astype("float")
df_campaign_insights["change_in_perc"] = ((df_campaign_insights["value_based_conversion_value"] / df_campaign_insights["properties.revenue"]) - 1)*100
df_campaign_insights["change_in_perc_witout_alpha"] = ((df_campaign_insights["value_based_conversion_value_witout_alpha"] / df_campaign_insights["properties.revenue"]) - 1)*100
df_campaign_insights[["created","session.date","properties.revenue","value_based_conversion_value_witout_alpha","change_in_perc","relates_to.campaign.name","change_in_perc_witout_alpha"]]

In [455]:
df_campaign_insights.groupby("relates_to.campaign.name")["change_in_perc_witout_alpha"].describe().transpose()   

In [456]:
df_campaign_insights.groupby("relates_to.campaign.name")["change_in_perc"].describe().transpose()   

In [None]:
df_campaign_insights.groupby("relates_to.campaign.name")["value_based_conversion_value"].describe().transpose()   

In [459]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: change for certain campaigns")
sns.boxplot(data=df_campaign_insights, x="created", y="change_in_perc", hue="relates_to.campaign.name", ax=ax1)
#sns.boxplot(data=df_campaign_insights, x="created", y="change_in_perc_witout_alpha", hue="relates_to.campaign.name", ax=ax1)
plt.grid(True)
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_certain_campaigns_change.png")

In [None]:
for campaign in df_campaign_insights["relates_to.campaign.name"].unique():
    temp = df_campaign_insights[df_campaign_insights["relates_to.campaign.name"]==campaign]
    fig = plt.figure()
    ax1 = fig.add_subplot(1,1,1)
    ax1.set_title(f"{customer}: change for certain campaigns original (:) vs. modified (-)")
    sns.lineplot(data=temp, x="session.date", y="value_based_conversion_value", hue="relates_to.campaign.name", ax=ax1, marker="o")
    sns.lineplot(data=temp, x="session.date", y="properties.revenue", hue="relates_to.campaign.name", ax=ax1, linestyle=":", marker="o")
    ax1.set_ylabel("value")
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=90)
    fig.savefig(f"{path_save}brand_vs_generic_certain_campaigns_revenue_modified_{campaign}.png")
    #break

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: change for certain campaigns")
sns.boxplot(data=df_campaign_insights, x="created", y="value_based_conversion_value", hue="relates_to.campaign.name", ax=ax1)
plt.grid(True)
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_certain_campaigns_revenue_modified.png")

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_title(f"{customer}: change for certain campaigns")
sns.boxplot(data=df_campaign_insights, x="created", y="properties.revenue", hue="relates_to.campaign.name", ax=ax1)
plt.grid(True)
plt.xticks(rotation=90)
fig.savefig(f"{path_save}brand_vs_generic_certain_campaigns_revenue.png")

# Apply Boost on Generic traffic
https://miro.com/app/board/uXjVJMhQHk8=/?moveToWidget=3458764646648291672&cot=14

## Exclude Brand Traffic

In [None]:
# # exclude brand traffic
# test_date = "20251030"
# df_filtered = df[df["created"].astype("string") == test_date]
# brand_traffic = df_filtered[df_filtered["brand traffic"] == True].sort_values(by="conv_prob", ascending=False)
# worst_n_percent = brand_traffic.groupby("session.date").size()*0
# worst_n_percent = worst_n_percent.astype("int")
# index_to_exclude = []
# for date in worst_n_percent.index:
#     temp_index = brand_traffic[brand_traffic["session.date"] == date].head(worst_n_percent[date]).index
#     print(f"date: {date} - temp_index: {len(temp_index)}")
#     index_to_exclude.extend(temp_index)
# print(f"index_to_exclude: {len(index_to_exclude)}")
# if len(index_to_exclude) != worst_n_percent.sum():
#     raise Exception("Index does not match")
# print(f"df shape: {df.shape}")
# df_filtered = df_filtered[~df_filtered.index.isin(index_to_exclude)]
# df.shape

In [None]:
# df_filtered.groupby("brand traffic")["conv_prob"].describe().transpose()

In [None]:
# df_filtered.groupby("brand traffic")["properties.revenue"].describe().transpose()

In [None]:
# fig = plt.figure()
# ax1 = fig.add_subplot(1,1,1)
# ax1.set_title(f"{customer}: brand (excluded 70%) vs. generic (signifcant = {signficant})")
# sns.boxplot(data=df_filtered, y="conv_prob", hue="brand traffic", ax=ax1)
# plt.grid(True)
# fig.savefig(f"{path_save}brand_vs_generic_boxplot_filtered.png")

In [None]:
# df.groupby("brand traffic")["conv_prob"].describe().transpose()

In [None]:
# df_filtered.groupby("brand traffic")["conv_prob"].describe().transpose()

## Adjust conversion value

In [None]:
def adjust_revenue(revenue, conversion_prob, top75_threshold, top50_threshold):
    if conversion_prob >= top75_threshold:
        adjusted_revenue = revenue * 1.05      # Top 25%: boost revenue
    elif conversion_prob >= top50_threshold:
        adjusted_revenue = revenue * 1.00     # Middle 25%: keep revenue
    else:
        adjusted_revenue = revenue * 0.9      # Bottom 50%: decrease rev
    return adjusted_revenue

def adjust_brand_revenue(revenue, brand_weighting):
    return revenue * brand_weighting

In [None]:
# df_filtered["counterfactual"] = df_filtered["conv_prob"]
# prob_stats = df_filtered["counterfactual"].describe()
# top75 = prob_stats["75%"]
# top50 = prob_stats["50%"]
# stats_brand_traffic = df_filtered[df_filtered["brand traffic"] == True]["counterfactual"].describe()
# brand_50 = stats_brand_traffic["50%"]
# print(f"Adjust brand with: {brand_50}")
# df_filtered["new_revenue"] = df_filtered[["brand traffic", "counterfactual", "properties.revenue"]].apply(lambda x: adjust_revenue(x["properties.revenue"], x["counterfactual"], top75, top50) if x["brand traffic"] == False else adjust_brand_revenue(x["properties.revenue"], brand_50), axis=1)

In [None]:
# df_filtered.groupby("brand traffic")[["properties.revenue","new_revenue"]].describe()

In [None]:
# fig = plt.figure()
# ax1 = fig.add_subplot(2,1,1)
# ax1.set_title(f"{customer}: revenue brand vs. generic")
# sns.boxplot(data=df_filtered, y="properties.revenue", hue="brand traffic", ax=ax1)
# plt.grid(True)
# ax2 = fig.add_subplot(2,1,2)
# ax2.set_title(f"{customer}: adjusted revenue brand vs. generic")
# sns.boxplot(data=df_filtered, y="new_revenue", hue="brand traffic", ax=ax2)
# plt.grid(True)
# fig.savefig(f"{path_save}brand_vs_generic_revenue_boxplot_filtered.png")

In [None]:
# fig = plt.figure()
# ax1 = fig.add_subplot(2,1,1)
# ax1.set_title(f"{customer}: revenue brand vs. generic")
# sns.boxplot(data=df_filtered, y="properties.revenue", hue="brand traffic", ax=ax1)
# plt.grid(True)
# plt.ylim(0, 400)
# ax2 = fig.add_subplot(2,1,2)
# ax2.set_title(f"{customer}: adjusted revenue brand vs. generic")
# sns.boxplot(data=df_filtered, y="new_revenue", hue="brand traffic", ax=ax2)
# plt.grid(True)
# plt.ylim(0, 400)

# fig.savefig(f"{path_save}brand_vs_generic_revenue_boxplot_filtered_zoom.png")

In [None]:
# Wie viel haben wir umverteilt an revenue?
# Umverteilung SUN vs. Magic -> warum ist die eine besser?
# Generic (100) + Brand (100) (Last Touch)