In [2]:
cd /Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/

# Rosental CRM Analyse
Hintergrund: Im Juni wurde eine erfolgreiche und sehr performante CRM Audience genutzt

Audience: kann mit Hilfe einer E-Mail-Liste nachgebaut werden

Kamapgne: ‚òÄÔ∏èüü¢üü°#DACH // CBO // Hot Prospects // SS25 // 06.06.2025
- #0001 // UGC // CA: Hot Prospects (New Customers) + LAL 5% // 06.06.2025
- #0002 // Static + LW // CA: Hot Prospects (New Customers) + LAL 5% // 06.06.2025

In [3]:
import ast
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from general_functions.return_account_ids import return_account_ids
from general_functions.constants import return_api_url
from general_functions.call_api_with_account_id import call_api_with_accountId, send_to_innkeepr_api_paginated

In [4]:
path_to_data = "/Users/karolinegriesbach/Documents/Innkeepr/Git/evaluation-and-execution-scripts/SprintStories/PRD-2516-Rosental-CRM-Analyse/"
api_url = return_api_url()
account_id = return_account_ids()
account_id = [acc["id"] for acc in account_id if acc["name"] == "Rosental"]
account_id = account_id[0]

# Check if Campaign exists in one of the audiences

In [5]:
audiences = send_to_innkeepr_api_paginated(
    f"{api_url}/audiences/query",
    account_id,
    {},
    logging
)

In [6]:
treatments = []
for aud in audiences:
    treatments += aud["config"]["treatments"]
treatments = list(set(treatments))
len(treatments)

In [7]:
treatments = send_to_innkeepr_api_paginated(
    f"{api_url}/treatments/query",
    account_id,
    {"id":treatments},
    logging
)
treatments_df = pd.json_normalize(treatments)
valid_treatment = treatments_df[treatments_df["relates_to.campaign.name"].str.contains("Prospect")]
if len(valid_treatment)==0:
    print(f"No valid treatments found in audiences. So use conversion model")
else:
    raise ValueError("Case is not defined")

# Load Data

In [8]:
emails = pd.read_csv(f"{path_to_data}Rosental Liste Juni 2025.csv").drop_duplicates()
print(emails.shape)
emails.head()

In [9]:
emails.to_parquet(f"{path_to_data}emails.parquet")

In [10]:
conversion_probs = pd.read_csv(f"{path_to_data}MVP 2516 Rosental CRM Analyse v2.csv")
print(conversion_probs.shape)
conversion_probs.head()

# Analyze Conversion Probabilities

In [11]:
def transform_json_emails_to_email(entry):
    email = []
    if entry == np.nan or str(entry) == "nan":
        return email
    entry = ast.literal_eval(entry)
    if isinstance(entry, list):
        for item in entry:
            if "id" in item.keys():
                email.append(item["id"])
    if len(email) == 0:
        return None
    return email

In [12]:
conversion_probs["email"] = conversion_probs["email_externalIds"].apply(lambda x: transform_json_emails_to_email(x))
conversion_probs = conversion_probs.explode("email").drop(columns=["email_externalIds"]).reset_index(drop=True)
conversion_probs["rosental_email"] = np.where(
    conversion_probs["email"].isin(emails["Email"]), True, False
)
conversion_probs

In [13]:
conversion_probs["email"].dropna().nunique()

In [14]:
conversion_probs.drop_duplicates(subset=["email"])["rosental_email"].value_counts(dropna=False)

In [15]:
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Probabilities for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.boxplot(data=conversion_probs, x="rosental_email", y="probability", ax=ax)
plt.grid(True)
plt.tight_layout()
fig.savefig(f"{path_to_data}conversion_probabilities_boxplot.png")

In [16]:
conversion_probs.groupby(by=["rosental_email"])["probability"].describe().reset_index().transpose()

In [17]:
conversions_by_date_and_group = conversion_probs.groupby(by=["rosental_email","date_conv"])["probability"].describe().reset_index().sort_values("date_conv")
conversions_by_date_and_group

In [18]:
conversion_probs = conversion_probs.sort_values(by="date_conv")
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Probabilities for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.lineplot(data=conversion_probs, x="date_conv", y="probability", hue="rosental_email", ax=ax)
ax.xaxis.set_major_locator(plt.MaxNLocator(20))
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
fig.savefig(f"{path_to_data}conversion_probabilities_by_date.png")

# Analyze Features

In [19]:
features = pd.read_csv(f"{path_to_data}MVP 2516 Rosental CRM Analyse v2 features june.csv")

In [20]:
features["email"] = features["email_externalIds"].apply(lambda x: transform_json_emails_to_email(x))
features = features.explode("email").drop(columns=["email_externalIds"]).reset_index(drop=True)
features["rosental_email"] = np.where(
    features["email"].isin(emails["Email"]), True, False
)
features

In [21]:
print(f"features min date: {features['date'].min()} - max date {features['date'].max()}")
features["email"].dropna().nunique()

In [22]:
tratments = send_to_innkeepr_api_paginated(
    f"{api_url}/treatments/query",
    account_id,
    {"id":features["treatment"].dropna().unique().tolist()},
    logging
)
treatments_df = pd.json_normalize(treatments)
treatments_df.head()

In [23]:
features["treatment"] = features["treatment"].astype("string")
treatments_df = treatments_df.rename(columns={"id": "treatment"})
treatments_df["treatment"] = treatments_df["treatment"].astype("string")
features = pd.merge(features, treatments_df[["treatment","name","relates_to.campaign.name"]], how="left", on="treatment")
features

In [24]:
features.drop_duplicates(subset=["email"])["rosental_email"].value_counts(dropna=False)

In [25]:
features["url_split"] = features["url"].apply(lambda x: "/".join(x.split("/")[0:5]).split("?")[0])
features[["url","url_split"]]

In [26]:
url_value_counts = features.groupby("rosental_email")["url"].value_counts(dropna=False).reset_index()
url_value_counts_top_10 = url_value_counts.sort_values(by=["rosental_email","count"], ascending=False).reset_index(drop=True)
url_value_counts_top_10 = url_value_counts_top_10.groupby("rosental_email").head(10)
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Urls for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.barplot(data=url_value_counts_top_10, x="url", y="count", hue="rosental_email", ax=ax)
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
fig.savefig(f"{path_to_data}vc_urls.png")

In [27]:
landingpage_value_counts = features.groupby("rosental_email")["landingpage"].value_counts(dropna=False).reset_index()
landingpage_value_counts = landingpage_value_counts.sort_values(by=["rosental_email","count"], ascending=False).reset_index(drop=True)
landingpage_value_counts_top10 = landingpage_value_counts.groupby("rosental_email").head(10)
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Landingpage for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.barplot(data=landingpage_value_counts_top10, x="landingpage", y="count", hue="rosental_email", ax=ax)
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
fig.savefig(f"{path_to_data}vc_landingpage.png")

In [28]:
session_browser_value_counts = features.groupby("rosental_email")["session_browser"].value_counts(dropna=False).reset_index()
session_browser_value_counts = session_browser_value_counts.sort_values(by=["rosental_email","count"], ascending=False).reset_index(drop=True)
session_browser_value_counts_top10 = session_browser_value_counts.groupby("rosental_email").head(10)
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Landingpage for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.barplot(data=session_browser_value_counts_top10, x="session_browser", y="count", hue="rosental_email", ax=ax)
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
fig.savefig(f"{path_to_data}vc_session_browser.png")
session_browser_value_counts_top10

In [29]:
treatment_value_counts = features.groupby("rosental_email")["name"].value_counts(dropna=False).reset_index()
treatment_value_counts

In [30]:
treatment_value_counts = features.groupby("rosental_email")["name"].value_counts(dropna=False).reset_index()
treatment_value_counts = treatment_value_counts.sort_values(by=["rosental_email","count"], ascending=False).reset_index(drop=True)
treatment_value_counts_top10 = treatment_value_counts.groupby("rosental_email").head(10)
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111)
ax.set_title(f"Treatments for visitors including Rosental Emails (True) vs. Others (False)")
ax = sns.barplot(data=treatment_value_counts_top10, x="name", y="count", hue="rosental_email", ax=ax)
plt.grid(True)
plt.xticks(rotation=90)
plt.tight_layout()
fig.savefig(f"{path_to_data}vc_treatment.png")
treatment_value_counts_top10

In [31]:
for col in ['session_duration_in_s', 'days_between_sessions',
       'pages_visited_per_user_cumulated', 'pages_visited_per_user',
       'pages_visited_per_session_cumulated', 'pages_visited_per_session',
       ]:
       fig = plt.figure(figsize=(4, 4))
       ax = fig.add_subplot(111)
       ax.set_title(f"{col} for visitors including Rosental Emails (True) vs. Others (False)")
       ax = sns.boxplot(data=features, x="rosental_email", y="session_duration_in_s", hue="rosental_email", ax=ax)
       plt.grid(True)
       plt.xticks(rotation=90)
       plt.tight_layout()
       plt.show()
       fig.savefig(f"{path_to_data}boxplot_{col}.png")