In [69]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [70]:
import logging
import delta_sharing
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt

import general_functions.databricks_client as db_client
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [71]:
url = return_api_url()
print(f"url = {url}")
accounts = return_account_ids()

In [84]:
all_models = pd.DataFrame()
for account in accounts:
    print(f"account = {account}")
    account_id = account["id"]
    models = call_api_with_accountId(
        f"{url}/models/query",
        account_id,
        {},
        logging
    )
    models = pd.json_normalize(models)
    if len(models) == 0:
        continue
    model_type = models.groupby(by=["audience","type"])["created"].min().reset_index()
    model_type = model_type.pivot(index="audience", columns="type", values="created").reset_index()
    if "causal" in model_type.columns:
        model_type["days_switch"] = (pd.to_datetime(model_type["causal"]) - pd.to_datetime(model_type["conversion"])).dt.days
    else:
        model_type["days_switch"] = 0
    model_type["days_switch_cleaned"] = model_type["days_switch"].apply(lambda x: x if x > 0 else None)
    model_type["account"] = account["name"]

    audiences = call_api_with_accountId(
        f"{url}/audiences/query",
        account_id,
        {"id":models["audience"].dropna().unique().tolist()},
        logging
    )
    audiences = pd.json_normalize(audiences).rename(columns={"id":"audience"})
    model_type = pd.merge(model_type, audiences[["audience","source"]].drop_duplicates(), how="left", on=["audience"])
    
    all_models = pd.concat([all_models, model_type])
    all_models.to_csv(f"Analysis/time_conversion_to_causal_model_switch.csv", index=False)


In [86]:
all_models["days_switch_causal_from_beginning"] = np.where(
    (all_models["conversion"].isnull()) & (all_models["causal"].isnull()==False),
    0,
    all_models["days_switch_cleaned"]
)

In [87]:
all_models

In [88]:
print(f"number of entries: {len(all_models)} - audiences with causal models: {all_models['causal'].count()}")

In [91]:
all_models.groupby(by=["account","source"])["days_switch_cleaned"].describe().sort_values(by="mean").reset_index()

In [78]:
all_models.groupby("account")["days_switch_causal_from_beginning"].describe().sort_values(by="mean")