In [None]:
import os
import requests
from dotenv import load_dotenv
import pandas as pd

# Load environment variables from .env file
load_dotenv()

SUPABASE_KEY = os.getenv("SUPABASE_KEY")

url = f"https://iukxcgvmzjfelwfrpkyi.supabase.co/rest/v1/analytics?apikey={SUPABASE_KEY}&select=*"
headers = {
    "apikey": SUPABASE_KEY,
    "Authorization": f"Bearer {SUPABASE_KEY}",
}

response = requests.get(url, headers=headers)


# Create a dataframe from the JSON response payload
data = response.json()


In [22]:
df = pd.DataFrame(data)
blacklisted_user_ids = ["390c0190-33a3-4029-ae09-687bff0b77d6"]
should_delete = df[df["type"] == "delete-data"]
blacklisted_user_ids.extend(should_delete["user_id"].tolist())

# Filter out rows where user_id is in the blacklisted_user_ids array
df = df[~df["user_id"].isin(blacklisted_user_ids)]


def get_top_domain(domain: str) -> str:
    if domain.startswith("www") or domain.startswith("ww2"):
        domain = domain[4:]
    
    d = domain.split("/")[0]

    return ".".join(d.split(".")[-3:]) if ".co.uk" in d else ".".join(d.split(".")[-2:])

# Add column that says if the user has a strategy or not
df["domain"] = df["url"].apply(lambda x: get_top_domain(x) if x != None else None)
# Define an empty array for blacklisted user ids


# AFTER:  2025-04-13 02:00
# BEFORE: 2025-04-26 10:00
df = df[
    (df["created_at"] <= "2025-04-13 02:00") | (df["created_at"] >= "2025-04-26 10:00")
]

In [23]:
# TO CSV
df.to_csv("analytics.csv", index=False)


In [24]:
# Create a new data frame, where it only includes a single sample of each type of event. The event is the "type" column.
df_unique = df.drop_duplicates(subset=["type"])
df_unique


Unnamed: 0,id,type,url,payload,user_id,session_id,received_at,created_at,domain
0,16926,time-spent,www.amazon.com/Amazon-Brand-Decorative-Decorat...,"{""duration"":738}",98ec858e-c25a-4c2a-a588-cfe9cf8a1fb9,b1d23a46-7793-48b7-bfef-02d2bf61309c,2025-03-14T00:03:23.770546+00:00,2025-03-14T00:03:18.47+00:00,amazon.com
2,16928,uninstall,www.lessextension.com/goodbye,,98ec858e-c25a-4c2a-a588-cfe9cf8a1fb9,none,2025-03-14T00:03:34.022325+00:00,2025-03-14T00:03:29.737+00:00,lessextension.com
10,16944,on-onboarding,www.lessextension.com/onboarding,,none,none,2025-03-14T16:00:23.615042+00:00,2025-03-14T16:00:22.709+00:00,lessextension.com
21,16955,active,kcgblchgejkpnemehaojecgbamdiacml/popup.html,true,4dd84d7b-578b-4c4a-9ddd-b99c02515df7,54cffff4-f3e4-45f6-945d-0dccff44a05a,2025-03-15T11:49:42.544898+00:00,2025-03-15T11:49:41.641+00:00,kcgblchgejkpnemehaojecgbamdiacml
23,16957,page-view,www.zalando.dk/herrer-home/,,4dd84d7b-578b-4c4a-9ddd-b99c02515df7,54cffff4-f3e4-45f6-945d-0dccff44a05a,2025-03-15T11:50:03.509868+00:00,2025-03-15T11:50:03.026+00:00,zalando.dk
33,16967,add-to-cart,www.zalando.dk/pier-one-fleecetroejer-pine-gro...,,4dd84d7b-578b-4c4a-9ddd-b99c02515df7,54cffff4-f3e4-45f6-945d-0dccff44a05a,2025-03-15T11:50:12.717857+00:00,2025-03-15T11:50:12.331+00:00,zalando.dk
37,17191,from-directs,www.lessextension.com/,"""anticonsumption""",less-website,none,2025-03-17T13:20:50.209908+00:00,2025-03-17T13:20:49.723+00:00,lessextension.com
40,17194,from-directs-cta,www.lessextension.com/,"""chrome_extension""",less-website,none,2025-03-17T13:41:45.949022+00:00,2025-03-17T13:41:45.307+00:00,lessextension.com
92,16979,cancel,www.zalando.dk/checkout/confirm,,4dd84d7b-578b-4c4a-9ddd-b99c02515df7,54cffff4-f3e4-45f6-945d-0dccff44a05a,2025-03-15T11:50:32.309562+00:00,2025-03-15T11:50:31.932+00:00,zalando.dk
99,16986,open-popup,kcgblchgejkpnemehaojecgbamdiacml/popup.html,,ffeed042-eeab-47b4-b58b-c26d9c1fae3e,3b906e20-2bba-4ddc-b5f4-1c2881187f85,2025-03-16T08:53:08.700911+00:00,2025-03-16T08:53:08.335+00:00,kcgblchgejkpnemehaojecgbamdiacml


In [25]:
# count different types of events
event_counts = df["type"].value_counts()
print(event_counts)

type
time-spent                    9004
page-view                      900
from-directs                   494
on-onboarding                  264
from-directs-cta                87
open-popup                      62
add-to-cart                     45
uninstall                       34
active                          28
welcome-modal-seen              28
enforce_wait_modal_shown        21
enforce_wait_info_expanded      10
enforce_wait_canceled            9
open-options                     5
place-order                      5
questionary-popup                4
cancel                           1
Name: count, dtype: int64


In [9]:
import re

# Count unique user IDs
# Define a regex pattern for UUIDv4
uuidv4_pattern = re.compile(r'^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$', re.IGNORECASE)

# Filter user_id's that match the UUIDv4 pattern
uuidv4 = df[df["user_id"].apply(lambda x: bool(uuidv4_pattern.match(x)))]
unique_users = uuidv4["user_id"].nunique()

uninstalled = df[df["type"] == "uninstall"]
unique_uninstalled = uninstalled["user_id"].nunique()
unique_installed = unique_users - unique_uninstalled

# look at the last active event for each unique user. If the last event has a payload of "false", this means they've deactivated it.
# I'd like to know how many of the current installed users have deactivated the extension.
last_active = df[df["type"] == "active"]
last_active = last_active.drop_duplicates(subset=["user_id"], keep="last")
last_active = last_active[last_active["payload"] == "false"]
unique_deactivated = last_active["user_id"].nunique()
totally_active = unique_installed - unique_deactivated
on_onboarding = df[(df["type"] == "on-onboarding") & (df["payload"].apply(lambda x: x != '""'))]

print("Installations:", len(on_onboarding), "\nUser activity seen from", unique_users, "\nuninstalled:", unique_uninstalled, "\ndeactivated:", unique_deactivated, "\nTotal registered active users:", totally_active)

Installations: 59 
User activity seen from 76 
uninstalled: 38 
deactivated: 7 
Total registered active users: 31
