In [11]:
import os
from flask import Flask, jsonify
from supabase import create_client, Client

app = Flask(__name__)

SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

TABLE_NAME = "v2_federato_amplitude_data"

In [9]:
def get_retention_events(user_id):
    """Calculate the top 5 retention events for a given user."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Convert events to a list of event types
        events = [event["event_type"] for event in events_response.data]
        
        # Calculate return frequency for each event type
        event_returns = {}
        for i, event in enumerate(events[:-1]):  # Exclude last event
            # Look at the next events to see if user returns to this event
            future_events = events[i+1:]
            if event in future_events:
                event_returns[event] = event_returns.get(event, 0) + 1
        
        # Calculate return rate (returns / total occurrences)
        event_counts = {}
        for event in events:
            event_counts[event] = event_counts.get(event, 0) + 1
            
        return_rates = {}
        for event, returns in event_returns.items():
            return_rates[event] = returns / event_counts[event]
        
        # Get top 5 events by return rate (minimum 2 occurrences)
        top_events = sorted(
            [(event, rate) for event, rate in return_rates.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results
        return [{"event": event, "return_rate": rate} for event, rate in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        # First, add the column directly with raw SQL
        add_column_sql = """
        DO $$ 
        BEGIN 
            IF NOT EXISTS (
                SELECT 1 
                FROM information_schema.columns 
                WHERE table_name = 'user_table' 
                AND column_name = 'top_retention_events'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN top_retention_events JSONB;
            END IF;
        END $$;
        """
        
        # Execute the SQL directly through a query
        supabase.table("user_table").select("*").limit(1).execute()  # Ensure connection
        supabase.postgrest.schema("public").rpc("execute_sql", {"query": add_column_sql}).execute()
        print("Added top_retention_events column if it didn't exist")
        
        # Get all user IDs
        users_response = supabase.table("user_table").select("user_id").execute()
        
        # Process each user
        for user in users_response.data:
            user_id = user["user_id"]
            top_events = get_retention_events(user_id)
            
            # Update the user's record with their top retention events
            supabase.table("user_table")\
                .update({"top_retention_events": top_events})\
                .eq("user_id", user_id)\
                .execute()
            
            print(f"Updated retention events for user {user_id}")
            
        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Added top_retention_events column if it didn't exist
Updated retention events for user 1
Updated retention events for user 2
Updated retention events for user 3
Updated retention events for user 4
Updated retention events for user 5
Updated retention events for user 6
Updated retention events for user 7
Updated retention events for user 8
Updated retention events for user 9
Updated retention events for user 10
Updated retention events for user 11
Updated retention events for user 12
Updated retention events for user 13
Updated retention events for user 14
Updated retention events for user 15
Updated retention events for user 16
Updated retention events for user 17
Updated retention events for user 18
Updated retention events for user 19
Updated retention events for user 20
Updated retention events for user 21
Updated retention events for user 22
Updated retention events for user 23
Updated retention events for user 24
Updated retention events for user 25
Updated retention events for us

In [13]:
def get_retention_events(user_id):
    """Calculate the top 5 retention events for a given user."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Convert events to a list of event types
        events = [event["event_type"] for event in events_response.data]
        
        # Calculate return frequency for each event type
        event_returns = {}
        for i, event in enumerate(events[:-1]):  # Exclude last event
            # Look at the next events to see if user returns to this event
            future_events = events[i+1:]
            if event in future_events:
                event_returns[event] = event_returns.get(event, 0) + 1
        
        # Calculate return rate (returns / total occurrences)
        event_counts = {}
        for event in events:
            event_counts[event] = event_counts.get(event, 0) + 1
            
        return_rates = {}
        for event, returns in event_returns.items():
            return_rates[event] = returns / event_counts[event]
        
        # Get top 5 events by return rate (minimum 2 occurrences)
        top_events = sorted(
            [(event, rate) for event, rate in return_rates.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results
        return [{"event": event, "return_rate": rate} for event, rate in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        page_size = 1000
        last_user_id = 6300
        
        while True:
            # Get next batch of users
            users_response = supabase.table("user_table")\
                .select("user_id")\
                .gt("user_id", last_user_id)\
                .order("user_id")\
                .limit(page_size)\
                .execute()
            
            # If no more users, break
            if not users_response.data:
                break
            
            # Process each user in this batch
            for user in users_response.data:
                user_id = user["user_id"]
                top_events = get_retention_events(user_id)
                
                # Update the user's record with their top retention events
                supabase.table("user_table")\
                    .update({"top_retention_events": top_events})\
                    .eq("user_id", user_id)\
                    .execute()
                
                print(f"Updated retention events for user {user_id}")
                last_user_id = user_id
            
            print(f"Completed batch up to user {last_user_id}")
        
        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Updated retention events for user 3000
Updated retention events for user 3001
Updated retention events for user 3002
Updated retention events for user 3003
Updated retention events for user 3004
Updated retention events for user 3005
Updated retention events for user 3006
Updated retention events for user 3007
Updated retention events for user 3008
Updated retention events for user 3009
Updated retention events for user 3010
Updated retention events for user 3011
Updated retention events for user 3012
Updated retention events for user 3013
Updated retention events for user 3014
Updated retention events for user 3015
Updated retention events for user 3016
Updated retention events for user 3017
Updated retention events for user 3018
Updated retention events for user 3019
Updated retention events for user 3020
Updated retention events for user 3021
Updated retention events for user 3022
Updated retention events for user 3023
Updated retention events for user 3024
Updated retention events 

In [2]:
# CHURN EVENTS

def get_churn_events(user_id):
    user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
    amplitude_id = user_response.data[0]["amplitude_id"]
    events_response = supabase.table(TABLE_NAME).select("*").eq("amplitude_id", amplitude_id).order("event_time", desc=False).execute()
    events = [event["event_type"] for event in events_response.data]
    
    
    


