In [1]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [2]:
import logging
import pandas as pd
import numpy as np
import awswrangler as wr
from matplotlib import pyplot as plt
import seaborn as sns
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [3]:
#Lillydoo - 68c2d9007bd2ec4485bb98ed
# Asambeauty - 682ed8362fc068cde38c3dff
conversion_action_id = "6914acfa3517fb74085e607b" #Nikin: 6834787b13526dc3d1017e06"
customer = "Asambeauty"
start_date = "20260101"
end_date = "20260202"
date_range = pd.date_range(start=start_date, end=end_date, freq="D").strftime("%Y%m%d").tolist()
url = return_api_url()
print(f"url = {url}")
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == customer]
account_id = account_id[0]

In [4]:
data_file_path = f"DataChecks/targeting_history_ga_conversion_update/data/targeting_history_{customer}_{conversion_action_id}_{start_date}_{end_date}.csv"
try:
    df = pd.read_csv(data_file_path)
except FileNotFoundError:
    print("File not found, creating new DataFrame.")
    df = pd.DataFrame()
    for date in date_range:
        try:
            print(f"Reading data for {date}")
            temp = wr.s3.read_parquet(f"s3://{account_id}/targeting.history/{date}/ga_conversion_update_{conversion_action_id}.parquet")
        except wr.exceptions.NoFilesFound:
            print(f". No data for {date}")
            continue
        temp["bucket_date"] = date
        df = pd.concat([df, temp])
    df.to_csv(data_file_path, index=False)
df

In [5]:
df.columns

In [6]:
df["created"] = df["created"].astype("string")
if "alpha" in df.columns:
    df["alpha"] = df["alpha"].astype("float")
df["conv_prob"] = df["conv_prob"].astype("float")
df["properties.revenue"] = df["properties.revenue"].astype("float")
df["final_adjusted_revenue"] = df["final_adjusted_revenue"].astype("float")


In [7]:
print(f"Date Range: {df['session.date'].min()} -  {df['session.date'].max()}")
df.drop_duplicates(subset=["session"])[["properties.revenue","final_adjusted_revenue"]].sum().values

In [8]:
df["properties.revenue"] = df["properties.revenue"].astype("float")
df.groupby(by=["created","session.date"])["properties.revenue"].sum().reset_index().sort_values(by=["session.date","created"])#["properties.revenue"].sum()

In [9]:
df.groupby(by=["created","session.date"])["final_adjusted_revenue"].sum().reset_index().sort_values(by=["session.date","created"])#["final_adjusted_revenue"].sum()

In [10]:
df.groupby(by=["created"])["session.date"].count()

In [11]:
df.groupby("created")["anonymousId"].count().plot(kind="bar")
plt.grid(True)

In [12]:
df.groupby("created")["conv_prob"].mean().plot(kind="line", marker="o")
plt.grid(True)
plt.title(f"Average Conversion Probability over Time for \n Conversion Action {conversion_action_id}")
plt.ylim(0,1)
plt.xticks(rotation=90)

In [13]:
count_brand_traffic = df.groupby(by=["created"])["traffic_type"].value_counts(dropna=False).reset_index()
count_brand_traffic

In [14]:
sns.barplot(data=count_brand_traffic, x="created", y="count", hue="traffic_type")
plt.grid(True)
plt.xticks(rotation=90)

In [15]:
df

In [16]:
df["final_multiplier"] = df["final_multiplier"].astype("float")
df["avg_campaign_conv_prob"] = df["avg_campaign_conv_prob"].astype("float")


In [17]:
sns.barplot(data=df, x="created", y="final_multiplier", hue="traffic_type")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Final Multiplier over Time for \n Conversion Action {conversion_action_id}")

In [18]:
df.groupby("traffic_type")["avg_campaign_conv_prob"].describe()

In [19]:
df[df["traffic_type"]=="brand"][["created","avg_campaign_conv_prob"]]

In [20]:
sns.barplot(data=df, x="created", y="conv_prob", hue="traffic_type")
sns.lineplot(data=df, x="created", y="avg_campaign_conv_prob", hue="traffic_type", linestyle="--", marker="o")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Probability over Time for \n Conversion Action {conversion_action_id} \n (line) average campaign probability, (bars) probability")

In [21]:
sns.lineplot(data=df, x="created", y="properties.revenue", hue="traffic_type", linestyle="--", color="g")
sns.lineplot(data=df, x="created", y="final_adjusted_revenue", hue="traffic_type", linestyle="-")
plt.ylabel("revenue")
plt.grid(True)
plt.xticks(rotation=90)
plt.title(f"Revenue over Time for \n Conversion Action {conversion_action_id} \n (--) original, (-) final adjusted")

In [22]:
df.groupby("traffic_type")[["properties.revenue","final_adjusted_revenue"]].mean()

In [23]:
revenue_diff = df.groupby("traffic_type")[["properties.revenue","final_adjusted_revenue"]].sum()
revenue_diff["diff"] = revenue_diff["properties.revenue"] - revenue_diff["final_adjusted_revenue"]
revenue_diff["diff in %"] = revenue_diff["final_adjusted_revenue"] / revenue_diff["properties.revenue"] *100 - 100
revenue_diff