In [2]:
import os
from flask import Flask, jsonify
from supabase import create_client, Client
from time import sleep

app = Flask(__name__)

SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

TABLE_NAME = "v2_federato_amplitude_data"

In [9]:
def get_retention_events(user_id):
    """Calculate the top 5 retention events for a given user."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Convert events to a list of event types
        events = [event["event_type"] for event in events_response.data]
        
        # Calculate return frequency for each event type
        event_returns = {}
        for i, event in enumerate(events[:-1]):  # Exclude last event
            # Look at the next events to see if user returns to this event
            future_events = events[i+1:]
            if event in future_events:
                event_returns[event] = event_returns.get(event, 0) + 1
        
        # Calculate return rate (returns / total occurrences)
        event_counts = {}
        for event in events:
            event_counts[event] = event_counts.get(event, 0) + 1
            
        return_rates = {}
        for event, returns in event_returns.items():
            return_rates[event] = returns / event_counts[event]
        
        # Get top 5 events by return rate (minimum 2 occurrences)
        top_events = sorted(
            [(event, rate) for event, rate in return_rates.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results
        return [{"event": event, "return_rate": rate} for event, rate in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        # First, add the column directly with raw SQL
        add_column_sql = """
        DO $$ 
        BEGIN 
            IF NOT EXISTS (
                SELECT 1 
                FROM information_schema.columns 
                WHERE table_name = 'user_table' 
                AND column_name = 'top_retention_events'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN top_retention_events JSONB;
            END IF;
        END $$;
        """
        
        # Execute the SQL directly through a query
        supabase.table("user_table").select("*").limit(1).execute()  # Ensure connection
        supabase.postgrest.schema("public").rpc("execute_sql", {"query": add_column_sql}).execute()
        print("Added top_retention_events column if it didn't exist")
        
        # Get all user IDs
        users_response = supabase.table("user_table").select("user_id").execute()
        
        # Process each user
        for user in users_response.data:
            user_id = user["user_id"]
            top_events = get_retention_events(user_id)
            
            # Update the user's record with their top retention events
            supabase.table("user_table")\
                .update({"top_retention_events": top_events})\
                .eq("user_id", user_id)\
                .execute()
            
            print(f"Updated retention events for user {user_id}")
            
        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Added top_retention_events column if it didn't exist
Updated retention events for user 1
Updated retention events for user 2
Updated retention events for user 3
Updated retention events for user 4
Updated retention events for user 5
Updated retention events for user 6
Updated retention events for user 7
Updated retention events for user 8
Updated retention events for user 9
Updated retention events for user 10
Updated retention events for user 11
Updated retention events for user 12
Updated retention events for user 13
Updated retention events for user 14
Updated retention events for user 15
Updated retention events for user 16
Updated retention events for user 17
Updated retention events for user 18
Updated retention events for user 19
Updated retention events for user 20
Updated retention events for user 21
Updated retention events for user 22
Updated retention events for user 23
Updated retention events for user 24
Updated retention events for user 25
Updated retention events for us

In [7]:
def get_retention_events(user_id):
    """Calculate the top 5 retention events for a given user."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Convert events to a list of event types
        events = [event["event_type"] for event in events_response.data]
        
        # Calculate return frequency for each event type
        event_returns = {}
        for i, event in enumerate(events[:-1]):  # Exclude last event
            # Look at the next events to see if user returns to this event
            future_events = events[i+1:]
            if event in future_events:
                event_returns[event] = event_returns.get(event, 0) + 1
        
        # Calculate return rate (returns / total occurrences)
        event_counts = {}
        for event in events:
            event_counts[event] = event_counts.get(event, 0) + 1
            
        return_rates = {}
        for event, returns in event_returns.items():
            return_rates[event] = returns / event_counts[event]
        
        # Get top 5 events by return rate (minimum 2 occurrences)
        top_events = sorted(
            [(event, rate) for event, rate in return_rates.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results
        return [{"event": event, "return_rate": rate} for event, rate in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        page_size = 1000
        last_user_id = 14200
        
        while True:
            try:
                # Get next batch of users
                users_response = supabase.table("user_table")\
                    .select("user_id")\
                    .gt("user_id", last_user_id)\
                    .order("user_id")\
                    .limit(page_size)\
                    .execute()
                
                # If no more users, break
                if not users_response.data:
                    break
                
                # Process each user in this batch
                for user in users_response.data:
                    user_id = user["user_id"]
                    top_events = get_retention_events(user_id)
                    
                    # Update the user's record with their top retention events
                    supabase.table("user_table")\
                        .update({"top_retention_events": top_events})\
                        .eq("user_id", user_id)\
                        .execute()
                    
                    print(f"Updated retention events for user {user_id}")
                    last_user_id = user_id
                
                print(f"Completed batch up to user {last_user_id}")
                sleep(1)

            except Exception as batch_error:
                print(f"Batch error: {batch_error}")
                sleep(5)
                continue

        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Updated retention events for user 14201
Updated retention events for user 14202
Updated retention events for user 14203
Updated retention events for user 14204
Updated retention events for user 14205
Updated retention events for user 14206
Updated retention events for user 14207
Updated retention events for user 14208
Updated retention events for user 14209
Updated retention events for user 14210
Updated retention events for user 14211
Updated retention events for user 14212
Updated retention events for user 14213
Updated retention events for user 14214
Updated retention events for user 14215
Updated retention events for user 14216
Updated retention events for user 14217
Updated retention events for user 14218
Updated retention events for user 14219
Updated retention events for user 14220
Updated retention events for user 14221
Updated retention events for user 14222
Updated retention events for user 14223
Updated retention events for user 14224
Updated retention events for user 14225


In [4]:
# Query to get top 10 most common events across all users
query_response = supabase.table("user_table_refined_v2").select("top_events").execute()

# Flatten all events and count occurrences
event_counts = {}
for user in query_response.data:
    if user["top_events"]:  # Check if not None or empty
        # Count each unique event only once per user
        user_events = set(event_data["event"] for event_data in user["top_events"])
        for event in user_events:
            event_counts[event] = event_counts.get(event, 0) + 1

# Get top 10 most common events
top_10_events = sorted(
    event_counts.items(),
    key=lambda x: x[1],  # Sort by count
    reverse=True
)[:10]

# Calculate percentage of users for each event
total_users = len([u for u in query_response.data if u["top_events"]])

print("\nTop 10 Most Common Events Overall:")
print("--------------------------------")
for event, count in top_10_events:
    percentage = (count / total_users) * 100 if total_users > 0 else 0
    print(f"Event: {event}")
    print(f"Number of Users: {count}")
    print(f"Percentage of Users: {percentage:.1f}%")
    print("--------------------------------")



Top 10 Most Common Events Overall:
--------------------------------
Event: application-window-opened
Number of Users: 1011
Percentage of Users: 73.0%
--------------------------------
Event: account-lines:::view
Number of Users: 579
Percentage of Users: 41.8%
--------------------------------
Event: account:::view
Number of Users: 526
Percentage of Users: 38.0%
--------------------------------
Event: dashboard:my-book::view
Number of Users: 427
Percentage of Users: 30.8%
--------------------------------
Event: account-lines::widget:render
Number of Users: 322
Percentage of Users: 23.2%
--------------------------------
Event: agency-dashboard:::view
Number of Users: 259
Percentage of Users: 18.7%
--------------------------------
Event: dashboard:my-book:widget:render
Number of Users: 247
Percentage of Users: 17.8%
--------------------------------
Event: dashboard:my-book:configurable-table:render
Number of Users: 246
Percentage of Users: 17.8%
--------------------------------
Event: acco