In [1]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [2]:
import yaml
import logging
import pandas as pd
import numpy as np
import awswrangler as wr
from matplotlib import pyplot as plt
import seaborn as sns
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.sanitize_accout_name import sanitize_account_name
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [3]:
customer = "Asambeauty"
conversion_action_id = "6914acfa3517fb74085e607b"
timestamp = "2026-01-2718:58:14.077011"
brand_string = "Brand Bidding"
start_date = "20260127"
end_date = "20260127"
thresholds = {
    "campaign_based_count": {"threshold_probs_remove":"probability_medium", "threshold_probs_add_bottom": "probability_medium","threshold_probs_add_top": "probability_top"},
}
date_range = pd.date_range(start=start_date, end=end_date, freq="D").strftime("%Y%m%d").tolist()
url = return_api_url()
print(f"url = {url}")
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == customer]
account_id = account_id[0]
data_file_path = f"DataChecks/targeting_history_ga_conversion_update/data/targeting_history_{customer}_test_check_{conversion_action_id}_{timestamp}.csv"
path_to_configs = "/Users/karolinegriesbach/Documents/Innkeepr/Git/innkeepr-analytics/configs/customer_specifications.yaml"

with open(path_to_configs, "r") as f:
    customer_configs = yaml.safe_load(f)
    customer_configs = customer_configs[sanitize_account_name(customer)]
customer_configs
if "synthetic_conversions" in customer_configs.keys():
    thresholds["synthetic_conversions"] = customer_configs["synthetic_conversions"]["thresholds"]
thresholds

In [4]:
conversion_actions = call_api_with_accountId(
    f"{url}/sources/googleAdwords/conversionActions/query", 
    accountID=account_id,
    content={},
    logger=logging
)
conversion_actions = [ca for ca in conversion_actions if conversion_action_id == ca["id"]]
print(conversion_actions)

In [5]:
try:
    data = pd.read_csv(data_file_path)
except FileNotFoundError:
    data = pd.DataFrame()
    for date in date_range:
        print(f"Date: {date}")
        for conversion_action in conversion_actions:
            conversion_action_id = conversion_action["id"]
            conversion_action_name = conversion_action["name"]
            try: 
                temp = wr.s3.read_parquet(f"s3://innkeepr-development/targeting.history/{date}/ga_conversion_update_{conversion_action_id}_{timestamp}.parquet")
            except wr.exceptions.NoFilesFound:
                print(f". No data for {date}")
                continue
            temp["conversion_action_name"] = conversion_action_name
            temp["conversion_action_id"] = conversion_action_id
            temp["s3_date"] = date
            data = pd.concat([data, temp])
    data.to_csv(data_file_path, index=False)
        

In [6]:
data

# Add Traffic Type for strategy != campaign_based & campaign_based_count

In [7]:
list_sessions_without_traffic_type = data[data["strategy"].isin(["campaign_based","campaign_based_count"])==False]["session"].unique().tolist()
if len(list_sessions_without_traffic_type) > 0:
    sessions_without_traffic_type = send_to_innkeepr_api_paginated(
        api_url = f"{url}/sessions/query",
        accountID= account_id,
        content = {
            "sessionId": list_sessions_without_traffic_type
        },
        logger = logging
    )
    sessions_without_traffic_type = pd.json_normalize(sessions_without_traffic_type)
    sessions_without_traffic_type=sessions_without_traffic_type[["sessionId","campaign.gclid"]]
    gclids = sessions_without_traffic_type["campaign.gclid"].dropna().unique().tolist()
    print(f"len gclids: {len(gclids)}")
    externalIds = send_to_innkeepr_api_paginated(
        api_url=f"{url}/signals/query",
        accountID=account_id,
        content={
            "externalId": gclids
        },
        logger=logging
    )
    externalIds = pd.json_normalize(externalIds)
    externalIds["traffic_type"] = np.where(
    externalIds["relates_to.campaign.name"].str.contains(
        brand_string, case=False
                    ),
                    "brand",
                    "generic",
                )
    sessions_without_traffic_type = pd.merge(
        sessions_without_traffic_type,
        externalIds[["externalId", "traffic_type"]],
        how="left",
        left_on="campaign.gclid",
        right_on="externalId"
    )
else:
    sessions_without_traffic_type = pd.DataFrame(columns=["sessionId","campaign.gclid","externalId","traffic_type"])

In [8]:
data = pd.merge(data, sessions_without_traffic_type[["sessionId","traffic_type","externalId"]], how="left", left_on="session", right_on="sessionId", suffixes=("", "_session"))
data["traffic_type_session"] = np.where(data["traffic_type_session"].isna(), "generic", data["traffic_type_session"])
data["traffic_type"] = np.where(data["traffic_type"].isna(), data["traffic_type_session"], data["traffic_type"])
data["traffic_type"] = np.where(data["traffic_type"].isna(), "other", data["traffic_type"])
data["traffic_type"].value_counts(dropna=False)

# ConversionAction vs. traffic_type

In [9]:
data = data[data["created"].astype("string")>="20251218"]

In [10]:
data_by_date_and_traffic_type = data.groupby(by=["created","conversion_action_name"])["traffic_type"].value_counts(dropna=False)
data_by_date_and_traffic_type = data_by_date_and_traffic_type.reset_index()
data_by_date_and_traffic_type["hue"] = data_by_date_and_traffic_type["conversion_action_name"]+"-"+data_by_date_and_traffic_type["traffic_type"]
data_by_date_and_traffic_type = data_by_date_and_traffic_type.sort_values(by=["conversion_action_name","traffic_type"]).reset_index(drop=True)
data_by_date_and_traffic_type

In [11]:
palette = {"brand": "#2ca02c", "generic":"#1f77b4", "other":"#ff7f0e"}
fig = plt.figure(figsize=(8,8))
for i, entry in enumerate(conversion_actions):
    conversion_action_name = entry["name"]
    ax = fig.add_subplot(2,1,i+1)
    sns.barplot(data=data_by_date_and_traffic_type[data_by_date_and_traffic_type["conversion_action_name"]==conversion_action_name], x="created", y="count", hue="traffic_type", palette=palette, ax=ax)
    ax.set_title(f"Count Traffic Type by Conversion Action {conversion_action_name}")
    plt.grid(True)
    plt.xticks(rotation=90)
    plt.tight_layout()
plt.show()


# ConversionAction vs. Threshold

In [12]:
def return_threshold(df, threshold_dict):
    new_threshold = {}
    df["created"] = df["created"].astype("string")
    for key, value in threshold_dict.items():
        key_date = df.groupby("created")[value].mean().to_dict()
        new_threshold[key] = key_date
    return new_threshold


In [13]:
conversion_action_thresholds = {}
for conversion_action in conversion_actions:
    conversion_action_strategy = conversion_action["type"]
    conversion_action_name = conversion_action["name"]
    if conversion_action_strategy == "campaign_based_count":
        temp_thresholds = return_threshold(data, thresholds[conversion_action_strategy])
        conversion_action_thresholds[conversion_action_name]=temp_thresholds
    elif conversion_action_strategy == "synthetic_conversions":
        conversion_action_thresholds[conversion_action_name]=thresholds[conversion_action_strategy]
conversion_action_thresholds

# ConversionAction vs. Probability

In [14]:
data["final_multiplier"] = data["final_multiplier"].astype("float")
data["avg_campaign_conv_prob"] = data["avg_campaign_conv_prob"].astype("float")
data["conv_prob"] = data["conv_prob"].astype("float")
data["properties.revenue"] = data["properties.revenue"].astype("float")
data["final_adjusted_revenue"] = data["final_adjusted_revenue"].astype("float")
data["raw_multiplier"] = data["raw_multiplier"].astype("float")
data["probability_top"] = data["probability_top"].astype("float")


In [15]:
sns.barplot(data=data, x="created", y="conv_prob", hue="traffic_type")
sns.lineplot(data=data, x="created", y="avg_campaign_conv_prob", hue="traffic_type", linestyle="--", marker="o")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Probability over Time for \n Conversion Action {conversion_action_id} \n (line) average campaign probability, (bars) probability")

In [16]:
sns.barplot(data=data, x="created", y="final_multiplier", hue="traffic_type")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Final Multiplier over Time for \n Conversion Action {conversion_action_id}")

In [17]:
data.groupby("traffic_type")[["raw_multiplier", "final_multiplier","probability_top"]].describe().transpose()

In [18]:
data.groupby("traffic_type")[["properties.revenue","final_adjusted_revenue"]].sum()

In [19]:
data.groupby("traffic_type")[["properties.revenue","final_adjusted_revenue"]].describe().transpose()

In [20]:
sns.lineplot(data=data, x="created", y="properties.revenue", hue="traffic_type", linestyle="--", color="g", marker="o")
sns.lineplot(data=data, x="created", y="final_adjusted_revenue", hue="traffic_type", linestyle="-", marker="x")
plt.ylabel("revenue")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Revenue over Time for \n Conversion Action {conversion_action_id} \n (--) original, (-) final adjusted")