In [2]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [3]:
import logging
import delta_sharing
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
import general_functions.databricks_client as db_client
from general_functions.call_api_with_account_id import send_to_innkeepr_api_paginated
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url

In [4]:
customer = "Nikin"
test_date = "2025-08-01"
url = return_api_url()
print(f"url = {url}")
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == customer]
account_id = account_id[0]

In [10]:
end_date = datetime.today().date()
end_date = end_date.strftime("%Y-%m-%d")
date_range = pd.date_range(start=test_date, end=end_date, freq="10D").strftime("%Y-%m-%d").tolist()
if end_date not in date_range:
    date_range.append(end_date)
date_range

In [14]:
end_date = datetime.today().date()
end_date = end_date.strftime("%Y-%m-%d")
sessions = pd.DataFrame()
for idate, date in enumerate(date_range):
  if idate == len(date_range)-1:
    lt = date_range[idate]
  else:
    lt = date_range[idate+1]
  gte = date_range[idate]
  print(f"gte: {gte}, lt: {lt}")
  content = {
      "created": {
        "$lt": lt,
        "$gte": gte
      }
  }
  temp = send_to_innkeepr_api_paginated(
      f"{url}/sessions/query",
      account_id,
      content,
      logging
  )
  temp = pd.json_normalize(temp)
  sessions = pd.concat([sessions, temp])
sessions

In [15]:
sessions = sessions[["id","created","sessionId","anonymousId","campaign.gclid"]]
sessions

In [17]:
anonymousId_list_sample = sessions[sessions["campaign.gclid"].isnull()==False]["anonymousId"].unique().tolist()
print(len(anonymousId_list_sample))

In [20]:
anonymousId_list_sample = sessions["anonymousId"].unique().tolist()
profiles = pd.DataFrame()
start = 0
while True:
    end = start + 5000
    if end >= len(anonymousId_list_sample):
        end = len(anonymousId_list_sample)
    print(f"start: {start}, end: {end}")
    list_sample = anonymousId_list_sample[start:end] 
    temp = send_to_innkeepr_api_paginated(
        f"{url}/profiles/query",
        account_id,
        {"anonymousId": list_sample},
        logging
    )
    temp = pd.json_normalize(temp)
    profiles = pd.concat([profiles, temp])
    start = end
    if start >= len(anonymousId_list_sample):
        break
profiles

In [21]:
cookieid = "id"
col_name = "name"
data_col_extIds = "externalIds"
profiles_id="_id"
col_last_modified="lastModified"
col_user_id="anonymousId"
profiles = profiles.rename(columns={"id": profiles_id})
profiles = profiles[[profiles_id, col_user_id, data_col_extIds, col_last_modified]]
profiles = profiles.explode(data_col_extIds)
profiles[[cookieid, col_name]] = profiles[data_col_extIds].apply(
        lambda x: pd.Series([x[cookieid], x[col_name]])
    )
profiles

In [22]:
sessions_merged = pd.merge(
    sessions,
    profiles[["anonymousId","name"]],
    how="left",
    on="anonymousId"
)
sessions_merged

In [23]:
sessions_merged["date"] = pd.to_datetime(sessions_merged["created"]).dt.date
sessions_merged = sessions_merged[sessions_merged["name"].isin(["anonymousId"])==False]

In [24]:
sessions_merged["name"].value_counts()

In [25]:
sessions_grouped = sessions_merged.groupby(by=["date"])["name"].value_counts().reset_index()
sessions_grouped["date"] = sessions_grouped["date"].astype(str)
sessions_grouped["name"] = sessions_grouped["name"].astype(str)

sessions_grouped = sessions_grouped.sort_values(by=["name","date"])
sessions_grouped

In [28]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111)
sns.lineplot(data=sessions_grouped, x="date", y="count", hue="name", style="name", ax=ax)
name_used = []
for i, row in sessions_grouped.iterrows():
    name = row['name']
    if name not in name_used:
        ax.annotate(f"{row['name']}", (row['date'], row['count']), ha='center', va='bottom')
    name_used.append(name)
plt.xticks(rotation=90)
plt.grid(True)
plt.legend()#bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0., ncol=1)


In [None]:
#TODO with gclids only