In [1]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [2]:
import os
import ast
import logging
import numpy as np
import pandas as pd
import awswrangler as wr
import matplotlib.pyplot as plt
import seaborn as sns
from general_functions.return_account_ids import return_account_ids
from general_functions.sanitize_accout_name import sanitize_account_name
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [3]:
customer = "to teach"
customer_dict = {
    "to teach":{
        "brand_string":"Brand Bidding", 
        "conversionActionId":"68cbfa5577b55cfe740fbc8b"
        }
}
customer_sanitized = sanitize_account_name(customer)
path_save = f"/Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/SprintStories/EN-2874-brand-vs-generic/data/{customer_sanitized}/"
start_date = "20251025"
end_date = "20251101"
date_range = pd.date_range(start=start_date, end=end_date, freq="D").strftime("%Y%m%d").tolist()
if not os.path.exists(path_save):
    os.makedirs(path_save)
url = return_api_url()
print(f"url = {url}")
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == customer]
account_id = account_id[0]

# Get Data

## Load Treatments

In [4]:
sources = call_api_with_accountId(
    f"{url}/sources/query",
    account_id,
    {},
    logging
)
print(f"Found {len(sources)} sources")

In [5]:
treatments = pd.DataFrame()
for source in sources:
    source_id = source["id"]
    treatments_source = send_to_innkeepr_api_paginated(
    f"{url}/treatments/query",
    account_id,
    {"source":source_id},
    logging
    )
    treatments_source = pd.json_normalize(treatments_source)
    treatments = pd.concat([treatments, treatments_source], ignore_index=True)
treatments

In [6]:
brand_treatments = []
for col in treatments.columns:
    temp = treatments[treatments[col].str.contains(customer_dict[customer]["brand_string"]).fillna(False)]
    treatment_ids = temp["id"].unique().tolist()
    if len(treatment_ids) > 0:
        print(f"Found ids for col {col}")
        brand_treatments = brand_treatments + treatment_ids
brand_treatments = list(set(brand_treatments))
print(f"Found {len(brand_treatments)} brand treatments")
treatments = treatments[treatments["id"].isin(brand_treatments)]
treatments

## Load Signals

In [7]:
signals = send_to_innkeepr_api_paginated(
    f"{url}/signals/query",
    account_id,
    {"relates_to.campaign.name":treatments["relates_to.campaign.name"].dropna().unique().tolist()},
    logging
)
signals = pd.json_normalize(signals)
signals

## Load Targeting History of Conversion Actions

In [8]:
conversion_action_id = customer_dict[customer]["conversionActionId"]
data_file_path = f"{path_save}targeting_history_{customer}_{conversion_action_id}_{start_date}_{end_date}.csv"
try:
    df = pd.read_csv(data_file_path)
except FileNotFoundError:
    print("File not found, creating new DataFrame.")
    df = pd.DataFrame()
    for date in date_range:
        try:
            print(f"Reading data for {date}")
            temp = wr.s3.read_parquet(f"s3://{account_id}/targeting.history/{date}/ga_conversion_update_{conversion_action_id}.parquet")
        except wr.exceptions.NoFilesFound:
            print(f". No data for {date}")
            continue
        temp["bucket_date"] = date
        df = pd.concat([df, temp])
    df.to_csv(data_file_path, index=False)
df = df[["created","profile","anonymousId","treatment","conv_prob","session.date","session","gclid_externalIds"]]
df = df.explode("gclid_externalIds")
df

## Merge Targeting History and Signal Id

In [9]:
def return_gclid_id(x):
    x = ast.literal_eval(x)
    if isinstance(x, list):
        try:
            return x[0]["id"]
        except KeyError:
            return None
    return None

In [10]:
df["externalId"] = df["gclid_externalIds"].apply(lambda x: return_gclid_id(x))
df["externalId"] = df["externalId"].astype("string")
signals["externalId"] = signals["externalId"].astype("string")
df = pd.merge(df, signals[["externalId","relates_to.campaign.name","relates_to.adGroup.name"]], on="externalId", how="left")
df["brand traffic"] = np.where(df["relates_to.campaign.name"].str.contains(customer_dict[customer]["brand_string"]).fillna(False), True, False)
df.head()

# Get Statistics

In [14]:
df.groupby("brand traffic")["conv_prob"].describe().transpose()

In [12]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
sns.histplot(data=df, y="conv_prob", hue="brand traffic", kde=True, ax=ax1)
plt.grid(True)
plt.tight_layout()
fig.savefig(f"{path_save}brand_vs_generic_histogram.png")

In [13]:
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
sns.boxplot(data=df, y="conv_prob", hue="brand traffic", ax=ax1)
plt.grid(True)
fig.savefig(f"{path_save}brand_vs_generic_boxplot.png")