In [1]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [2]:
import logging
import pandas as pd
import numpy as np
import awswrangler as wr
from matplotlib import pyplot as plt
import seaborn as sns
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [12]:
account_ids = return_account_ids()
url = return_api_url()
path_to_save = "DataChecks/audiencesetting/"

In [34]:
account_id = [account["id"] for account in account_ids if account["name"] == "Rosental"][0]
sources = call_api_with_accountId(f"{url}/sources/query", account_id, {}, logging)
sources = pd.json_normalize(sources)[["id","name"]].rename(columns={"id":"source","name":"source_name"})
sources

In [36]:
all_models = pd.DataFrame()
for account in account_ids:
    account_id = account["id"]
    account_name = account["name"]
    print(f"{account_name}")
    response = call_api_with_accountId(f"{url}/models/query",account_id, {}, logging)
    models = pd.json_normalize(response)
    if models.empty:
        continue
    models["max_date"] = models.groupby("audience")["created"].transform(max)
    models = models[models["created"] == models["max_date"]]
    audiences = call_api_with_accountId(f"{url}/audiences/query", account_id, {}, logging)
    audiences = pd.json_normalize(audiences)
    audiences = audiences.rename(columns={"id":"audience"})
    audiences = audiences[["audience","name","source","status"]]
    models = pd.merge(models, audiences, how="left", on=["audience"])
    models = pd.merge(models, sources, how="left", on=["source"])
    all_models = pd.concat([all_models, models])
all_models.to_csv(f"{path_to_save}all_models.csv")

In [38]:
all_models = all_models[all_models["status"] == "active"]
all_models = all_models.reset_index(drop=True)

In [45]:
all_models["audienceSize"] = np.where(
    all_models["audienceSizePercentage"].isnull(),
    all_models["audienceSize"],
    None
)
all_models["audienceSize"] = all_models["audienceSize"].astype("float")
all_models

In [57]:
causal_models = all_models[all_models["type"]=="causal"]
conversion_models = all_models[all_models["type"]=="conversion"]

In [95]:
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Number of models per source")
sns.countplot(x="source_name", hue="type", data=all_models, ax=ax)
plt.xticks(rotation=90)
plt.grid(True)
plt.legend(bbox_to_anchor=(1.65, 1))
plt.tight_layout()
fig.savefig(f"{path_to_save}number_of_models_per_source.png", bbox_inches="tight")

In [90]:
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(211)
ax.set_title(f"AudienceSize Causal Models")
sns.countplot(x="audienceSize", hue="source_name", data=causal_models,ax=ax)
plt.xticks(rotation=90)
#plt.xlim([0, 160000])
plt.grid(True)
plt.legend(bbox_to_anchor=(1.05, 1))
ax2 = fig.add_subplot(212)
ax2.set_title(f"AudienceSize Conversion Models")
sns.countplot(x="audienceSize", hue="source_name", data=conversion_models,ax=ax2)
plt.xticks(rotation=90)
#plt.xlim([0, 160000])
plt.grid(True)
plt.tight_layout()
plt.legend(bbox_to_anchor=(1.35, 1))
fig.savefig(f"{path_to_save}number_of_audienceSize_per_source.png", bbox_inches="tight")

In [88]:
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(211)
ax.set_title(f"AudienceSizePercentage Causal Models")
sns.countplot(x="audienceSizePercentage", hue="source_name", data=causal_models,ax=ax)
#plt.xticks(rotation=90)
plt.grid(True)
plt.legend(bbox_to_anchor=(1.05, 1))
ax2 = fig.add_subplot(212)
ax2.set_title(f"AudienceSizePercentage Conversion Models")
sns.countplot(x="audienceSizePercentage", hue="source_name", data=conversion_models,ax=ax2)
plt.xticks(rotation=90)
#plt.xlim([0, 0.65])
plt.grid(True)
plt.tight_layout()
plt.legend(bbox_to_anchor=(1.35, 1))
fig.savefig(f"{path_to_save}number_of_audienceSizePercentage_per_source.png", bbox_inches="tight")

In [84]:
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(211)
ax.set_title(f"targetingOutlookDays Causal Models")
sns.countplot(x="targetingOutlookDays", hue="source_name", data=causal_models,ax=ax)
plt.xticks(rotation=90)
plt.grid(True)
plt.legend(bbox_to_anchor=(1.35, 1))
ax2 = fig.add_subplot(212)
ax2.set_title(f"targetingOutlookDays Conversion Models")
sns.countplot(x="targetingOutlookDays", hue="source_name", data=conversion_models,ax=ax2)
plt.xticks(rotation=90)
plt.grid(True)
plt.legend(bbox_to_anchor=(1.35, 1))
plt.tight_layout()
fig.savefig(f"{path_to_save}number_of_targetingOutlookDays_per_source.png", bbox_inches="tight")