In [13]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

In [14]:
import logging
import pandas as pd
from datetime import datetime, timedelta
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated
from general_functions.datetime_helper import transform_date_to_timestamp_milliseconds


In [15]:
url = return_api_url()
accounts = return_account_ids()
end_date = "2025-07-25" #datetime.today().date() + timedelta(days=10)
start_date ="2025-01-20" #end_date - timedelta(days=31*6)
print(f"start_date: {start_date}, end_date: {end_date}")
date_range = pd.date_range(start=start_date, end=end_date, freq="10D").strftime("%Y-%m-%d").tolist()
ignore_account = []
path_to_save = "/Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/SprintStories/PRD-2581-Pricing/"

In [16]:
res = pd.DataFrame(
    #columns=["month","goal","name","conv_count","conv_count_with_gclid","account"]
)

In [17]:
def load_conversions(from_date, to_date, account_id):
    content={
     "created": {
                     "$gte": transform_date_to_timestamp_milliseconds(from_date),
                     "$lte": transform_date_to_timestamp_milliseconds(to_date),
               }
    }
    response=send_to_innkeepr_api_paginated(
        f"{url}/conversions/query",
        account_id,
        content,
        logging
    )
    temp = pd.json_normalize(response)
    if temp.empty:
        print(f" ... conversions empty: {temp}")
        return temp
    temp = temp[["created","name","sessionId","anonymousId"]]
    return temp

def return_goals(goals: list, account_id):
    goals = send_to_innkeepr_api_paginated(
        f"{url}/goals/query",
        account_id,
        {"id":goals},
        logging,
    )
    return goals

def return_model_goals(account_id):
    models = call_api_with_accountId(
        f"{url}/models/query",
        account_id,
        {},
        logging,
    )
    models_df = pd.json_normalize(models)
    if models_df.empty:
        return []
    goals = models_df["goal"].unique().tolist()
    goals = return_goals(goals, account_id)
    return goals

def get_sessions_with_glicd(account_id, list_sessions):
    print(f"list_sessions = {len(list_sessions)}")
    sessions = pd.DataFrame()
    source_parameter = call_api_with_accountId(
        f"{url}/sources/query",
        account_id,
        {"name":"googleAdwords"},
        logging,
    )
    try:
        tracking_param = source_parameter[0]["options"]["urlCampaignParam"]
    except KeyError:
        print("No googleAdwords urlCampaignParam source found")
        tracking_param = None
    print(f"tracking_param: {tracking_param}")
    if len(list_sessions) > 50000:
        intervall = 50000
    else:
        intervall = len(list_sessions)
    print(f"use intervall: {intervall}")
    for i in range(0, len(list_sessions), intervall):
        end = i+ intervall
        if end >=  len(list_sessions):
            end = len(list_sessions)
        print(f"i: {i}, end: {end}")
        temp_sessions = list_sessions[i:end]
        print(len(temp_sessions), temp_sessions[0:10])
        temp = send_to_innkeepr_api_paginated(
            f"{url}/sessions/query",
            account_id,
            {"sessionId":temp_sessions},
            logging,
        )
        temp = pd.json_normalize(temp)
        if "campaign.gclid" not in temp.columns:
            continue
        temp = temp[["sessionId","campaign.gclid"]].dropna(subset=["campaign.gclid"])
        sessions = pd.concat([sessions, temp])
    return sessions

In [18]:
for entryi, account in enumerate(accounts):
    account_id = account["id"]
    account_name = account["name"]
    print(f"{entryi}/{len(accounts)}: Account {account_name}")
    goals = return_model_goals(account_id)
    print(f" .. gaols: {len(goals)}: {goals}")
    if len(goals) == 0:
        print(f"... no goals found for account {account_name}")
        continue
    # get conversions
    try: 
        conversions = pd.read_csv(f"{path_to_save}Backup/conversions_{account_name}_{start_date}_{end_date}.csv")
        conversions = conversions[conversions["account"]==account_name]
        print(f"Loaded conversions: {len(conversions)}, {conversions['account'].value_counts()}")
    except FileNotFoundError:
        conversions = pd.DataFrame()
        for idate, date in enumerate(date_range):
            if idate == len(date_range)-1:
                conv_to_date = end_date
            else:
                conv_to_date = date_range[idate+1]
            print(f"... loading conversions from {date} to {conv_to_date}")
            temp = load_conversions(date, conv_to_date, account_id)
            print(f" ... {len(temp)} conversions found")
            temp["account"]=account_name
            conversions = pd.concat([temp, conversions])
            # ignore_account.append(account_id)
        conversions.to_csv(f"{path_to_save}conversions_{account_name}_{start_date}_{end_date}.csv")
    conversions["month"] = pd.to_datetime(conversions["created"]).dt.strftime("%Y-%m")
    conversions = conversions[conversions["month"].isin(["2025-07"])==False]
    sessions = conversions["sessionId"].dropna().unique().tolist()
    if conversions.empty:
        print(f"... no conversions found for account {account_name}")
        continue
    try:
        print("Read data from file: ")
        sessions_with_gclid = pd.read_csv(f"{path_to_save}sessions_with_gclid_{account_name}_{start_date}_{end_date}.csv")
    except FileNotFoundError:
        print("File not found, creating new DataFrame.")
        sessions_with_gclid = get_sessions_with_glicd(account_id, sessions)
        sessions_with_gclid.to_csv(f"{path_to_save}sessions_with_gclid_{account_name}_{start_date}_{end_date}.csv")
    conversions = pd.merge(conversions, sessions_with_gclid, how="left", on="sessionId")
    for goal in goals:
        conversionEvents = goal["conversionEvents"]
        print(f"... conversionEvents: {goal['id']}", conversionEvents)
        conv_with_events = conversions[conversions["name"].isin(conversionEvents)]
        conv_with_events_grouped = conv_with_events.groupby("month")["sessionId"].nunique()
        conv_with_events_grouped = conv_with_events_grouped.reset_index()
        conv_with_events_grouped = conv_with_events_grouped.rename(columns={"sessionId": "conv_count"})
        conv_with_events_grouped["goal"] = goal["id"]
        conv_with_events_grouped["name"] = None
        conv_with_events_grouped["name"] = conv_with_events_grouped["name"].apply(lambda x: conversionEvents)
        conv_with_events_grouped["account"] = account_name
        conversions_with_gclid = conv_with_events[conv_with_events["campaign.gclid"].isnull()==False]
        conversions_with_gclid = conversions_with_gclid.groupby("month")["sessionId"].nunique().reset_index().rename(columns={"sessionId": "conv_count_with_gclid"})
        conv_with_events_grouped = pd.merge(conv_with_events_grouped, conversions_with_gclid, how="left", on="month")
        res = pd.concat([res, conv_with_events_grouped])
        res = res.drop_duplicates(subset=["month","account","goal","conv_count","conv_count_with_gclid"])
    res.to_csv(f"{path_to_save}conversions_count_{start_date}_{end_date}.csv")

In [19]:
res