In [4]:
import os
from flask import Flask, jsonify
from supabase import create_client, Client
from time import sleep
from datetime import datetime, timedelta

app = Flask(__name__)

SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

TABLE_NAME = "v2_federato_amplitude_data"

In [9]:
def get_retention_events(user_id):
    """Calculate the top 5 retention events for a given user."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Convert events to a list of event types
        events = [event["event_type"] for event in events_response.data]
        
        # Calculate return frequency for each event type
        event_returns = {}
        for i, event in enumerate(events[:-1]):  # Exclude last event
            # Look at the next events to see if user returns to this event
            future_events = events[i+1:]
            if event in future_events:
                event_returns[event] = event_returns.get(event, 0) + 1
        
        # Calculate return rate (returns / total occurrences)
        event_counts = {}
        for event in events:
            event_counts[event] = event_counts.get(event, 0) + 1
            
        return_rates = {}
        for event, returns in event_returns.items():
            return_rates[event] = returns / event_counts[event]
        
        # Get top 5 events by return rate (minimum 2 occurrences)
        top_events = sorted(
            [(event, rate) for event, rate in return_rates.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results
        return [{"event": event, "return_rate": rate} for event, rate in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        # First, add the column directly with raw SQL
        add_column_sql = """
        DO $$ 
        BEGIN 
            IF NOT EXISTS (
                SELECT 1 
                FROM information_schema.columns 
                WHERE table_name = 'user_table' 
                AND column_name = 'top_retention_events'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN top_retention_events JSONB;
            END IF;
        END $$;
        """
        
        # Execute the SQL directly through a query
        supabase.table("user_table").select("*").limit(1).execute()  # Ensure connection
        supabase.postgrest.schema("public").rpc("execute_sql", {"query": add_column_sql}).execute()
        print("Added top_retention_events column if it didn't exist")
        
        # Get all user IDs
        users_response = supabase.table("user_table").select("user_id").execute()
        
        # Process each user
        for user in users_response.data:
            user_id = user["user_id"]
            top_events = get_retention_events(user_id)
            
            # Update the user's record with their top retention events
            supabase.table("user_table")\
                .update({"top_retention_events": top_events})\
                .eq("user_id", user_id)\
                .execute()
            
            print(f"Updated retention events for user {user_id}")
            
        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Added top_retention_events column if it didn't exist
Updated retention events for user 1
Updated retention events for user 2
Updated retention events for user 3
Updated retention events for user 4
Updated retention events for user 5
Updated retention events for user 6
Updated retention events for user 7
Updated retention events for user 8
Updated retention events for user 9
Updated retention events for user 10
Updated retention events for user 11
Updated retention events for user 12
Updated retention events for user 13
Updated retention events for user 14
Updated retention events for user 15
Updated retention events for user 16
Updated retention events for user 17
Updated retention events for user 18
Updated retention events for user 19
Updated retention events for user 20
Updated retention events for user 21
Updated retention events for user 22
Updated retention events for user 23
Updated retention events for user 24
Updated retention events for user 25
Updated retention events for us

In [None]:
def get_retention_events(user_id):
    """Calculate the top 5 events where users spend the most time."""
    try:
        # Get user's amplitude_id
        user_response = supabase.table("user_table").select("amplitude_id").eq("user_id", user_id).execute()
        amplitude_id = user_response.data[0]["amplitude_id"]
        
        # Get all events for this user, ordered by time
        events_response = supabase.table(TABLE_NAME)\
            .select("event_type, event_time")\
            .eq("amplitude_id", amplitude_id)\
            .order("event_time", desc=False)\
            .execute()
        
        if not events_response.data:
            return []
        
        # Calculate time spent on each event type
        event_durations = {}
        event_counts = {}
        
        for i, current_event in enumerate(events_response.data[:-1]):
            next_event = events_response.data[i + 1]
            
            # Convert string timestamps to datetime objects
            current_time = datetime.fromisoformat(current_event["event_time"].replace('Z', '+00:00'))
            next_time = datetime.fromisoformat(next_event["event_time"].replace('Z', '+00:00'))
            
            duration = (next_time - current_time).total_seconds()
            
            event_type = current_event["event_type"]
            event_durations[event_type] = event_durations.get(event_type, 0) + duration
            event_counts[event_type] = event_counts.get(event_type, 0) + 1
        
        # Calculate average duration for each event type
        avg_durations = {}
        for event_type in event_durations:
            avg_durations[event_type] = event_durations[event_type] / event_counts[event_type]
        
        # Get top 5 events by average duration (minimum 2 occurrences)
        top_events = sorted(
            [(event, duration) for event, duration in avg_durations.items() 
             if event_counts[event] >= 2],  # Filter events with at least 2 occurrences
            key=lambda x: x[1],
            reverse=True
        )[:5]
        
        # Format results (duration in seconds)
        return [{"event": event, "avg_duration": duration} for event, duration in top_events]
        
    except Exception as e:
        print(f"Error calculating retention events for user {user_id}: {e}")
        return []

# Update all users in the user_table with their top retention events
def update_all_users_retention_events():
    try:
        page_size = 1000
        last_user_id = 0
        
        while True:
            try:
                # Get next batch of users
                users_response = supabase.table("user_table")\
                    .select("user_id")\
                    .gt("user_id", last_user_id)\
                    .order("user_id")\
                    .limit(page_size)\
                    .execute()
                
                # If no more users, break
                if not users_response.data:
                    break
                
                # Process each user in this batch
                for user in users_response.data:
                    user_id = user["user_id"]
                    top_events = get_retention_events(user_id)
                    
                    # Update the user's record with their top retention events
                    supabase.table("user_table")\
                        .update({"top_retention_events": top_events})\
                        .eq("user_id", user_id)\
                        .execute()
                    
                    print(f"Updated retention events for user {user_id}")
                    last_user_id = user_id
                
                print(f"Completed batch up to user {last_user_id}")
                sleep(1)

            except Exception as batch_error:
                print(f"Batch error: {batch_error}")
                sleep(5)
                continue

        print("Completed updating all users' retention events")
        
    except Exception as e:
        print(f"Error updating retention events: {e}")

# Execute the update
update_all_users_retention_events()

Updated retention events for user 1
Updated retention events for user 2
Updated retention events for user 3
Updated retention events for user 4
Updated retention events for user 5
Updated retention events for user 6
Updated retention events for user 7
Updated retention events for user 8
Updated retention events for user 9
Updated retention events for user 10
Updated retention events for user 11
Updated retention events for user 12
Updated retention events for user 13
Updated retention events for user 14
Updated retention events for user 15
Updated retention events for user 16
Updated retention events for user 17
Updated retention events for user 18
Updated retention events for user 19
Updated retention events for user 20
Updated retention events for user 21
Updated retention events for user 22
Updated retention events for user 23
Updated retention events for user 24
Updated retention events for user 25
Updated retention events for user 26
Updated retention events for user 27
Updated re

In [5]:
def update_user_session_metrics():
    """Add and update session metrics columns in the user_table."""
    try:
        # First, add the columns if they don't exist
        add_columns_sql = """
        DO $$ 
        BEGIN 
            -- Add average_session_time column
            IF NOT EXISTS (
                SELECT 1 FROM information_schema.columns 
                WHERE table_name = 'user_table' AND column_name = 'average_session_time'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN average_session_time FLOAT;
            END IF;

            -- Add total_session_time column
            IF NOT EXISTS (
                SELECT 1 FROM information_schema.columns 
                WHERE table_name = 'user_table' AND column_name = 'total_session_time'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN total_session_time FLOAT;
            END IF;

            -- Add frequency_of_sessions column
            IF NOT EXISTS (
                SELECT 1 FROM information_schema.columns 
                WHERE table_name = 'user_table' AND column_name = 'frequency_of_sessions'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN frequency_of_sessions INTEGER;
            END IF;

            -- Add user_retention_30 column
            IF NOT EXISTS (
                SELECT 1 FROM information_schema.columns 
                WHERE table_name = 'user_table' AND column_name = 'user_retention_30'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN user_retention_30 FLOAT;
            END IF;
        END $$;
        """
        
        # Execute the column addition
        supabase.postgrest.schema("public").rpc("execute_sql", {"query": add_columns_sql}).execute()
        print("Added new columns if they didn't exist")

        # Calculate the timestamp for 30 days ago
        thirty_days_ago = (datetime.now() - timedelta(days=30)).isoformat()

        # Process users in batches
        page_size = 1000
        last_user_id = 0
        
        while True:
            try:
                # Get next batch of users
                users_response = supabase.table("user_table")\
                    .select("user_id, amplitude_id")\
                    .gt("user_id", last_user_id)\
                    .order("user_id")\
                    .limit(page_size)\
                    .execute()
                
                if not users_response.data:
                    break
                
                # Process each user in this batch
                for user in users_response.data:
                    user_id = user["user_id"]
                    amplitude_id = user["amplitude_id"]
                    
                    # Get all events for this user in the last 30 days
                    events_response = supabase.table(TABLE_NAME)\
                        .select("event_time, session_id")\
                        .eq("amplitude_id", amplitude_id)\
                        .gte("event_time", thirty_days_ago)\
                        .order("event_time")\
                        .execute()
                    
                    if events_response.data:
                        # Group events by session
                        sessions = {}
                        for event in events_response.data:
                            session_id = event["session_id"]
                            event_time = datetime.fromisoformat(event["event_time"].replace('Z', '+00:00'))
                            
                            if session_id not in sessions:
                                sessions[session_id] = {"events": [], "duration": 0}
                            sessions[session_id]["events"].append(event_time)
                        
                        # Calculate session metrics
                        total_sessions = len(sessions)
                        total_session_time = 0
                        
                        for session in sessions.values():
                            if len(session["events"]) > 1:
                                session_duration = (max(session["events"]) - min(session["events"])).total_seconds()
                                total_session_time += session_duration
                                session["duration"] = session_duration
                        
                        # Calculate metrics
                        average_session_time = total_session_time / total_sessions if total_sessions > 0 else 0
                        frequency_of_sessions = total_sessions  # Number of sessions in past 30 days
                        
                        # Calculate retention (days active / 30 days)
                        unique_days = len(set(event_time.date() for session in sessions.values() for event_time in session["events"]))
                        user_retention_30 = unique_days / 30
                        
                        # Update user metrics
                        supabase.table("user_table")\
                            .update({
                                "average_session_time": average_session_time,
                                "total_session_time": total_session_time,
                                "frequency_of_sessions": frequency_of_sessions,
                                "user_retention_30": user_retention_30
                            })\
                            .eq("user_id", user_id)\
                            .execute()
                    
                    else:
                        # No events in last 30 days
                        supabase.table("user_table")\
                            .update({
                                "average_session_time": 0,
                                "total_session_time": 0,
                                "frequency_of_sessions": 0,
                                "user_retention_30": 0
                            })\
                            .eq("user_id", user_id)\
                            .execute()
                    
                    print(f"Updated session metrics for user {user_id}")
                    last_user_id = user_id
                
                print(f"Completed batch up to user {last_user_id}")
                sleep(1)  # Rate limiting

            except Exception as batch_error:
                print(f"Batch error: {batch_error}")
                sleep(5)  # Longer sleep on error
                continue

        print("Completed updating all users' session metrics")
        
    except Exception as e:
        print(f"Error updating session metrics: {e}")

# Execute the update
update_user_session_metrics()

Added new columns if they didn't exist
Updated session metrics for user 1
Updated session metrics for user 2
Updated session metrics for user 3
Updated session metrics for user 4
Updated session metrics for user 5
Updated session metrics for user 6
Updated session metrics for user 7
Updated session metrics for user 8
Updated session metrics for user 9
Updated session metrics for user 10
Updated session metrics for user 11
Updated session metrics for user 12
Updated session metrics for user 13
Updated session metrics for user 14
Updated session metrics for user 15
Updated session metrics for user 16
Batch error: {'code': '57014', 'details': None, 'hint': None, 'message': 'canceling statement due to statement timeout'}
Updated session metrics for user 17
Updated session metrics for user 18
Updated session metrics for user 19
Updated session metrics for user 20
Updated session metrics for user 21
Updated session metrics for user 22
Updated session metrics for user 23
Updated session metri

In [6]:
def update_user_active_periods():
    """Add and update top 5 daily active time periods for each user."""
    try:
        # First, add the column if it doesn't exist
        add_column_sql = """
        DO $$ 
        BEGIN 
            IF NOT EXISTS (
                SELECT 1 FROM information_schema.columns 
                WHERE table_name = 'user_table' AND column_name = 'daily_active_periods'
            ) THEN 
                ALTER TABLE user_table ADD COLUMN daily_active_periods JSONB;
            END IF;
        END $$;
        """
        
        # Execute the column addition
        supabase.postgrest.schema("public").rpc("execute_sql", {"query": add_column_sql}).execute()
        print("Added daily_active_periods column if it didn't exist")

        # Process users in batches
        page_size = 1000
        last_user_id = 0
        
        while True:
            try:
                # Get next batch of users
                users_response = supabase.table("user_table")\
                    .select("user_id, amplitude_id")\
                    .gt("user_id", last_user_id)\
                    .order("user_id")\
                    .limit(page_size)\
                    .execute()
                
                if not users_response.data:
                    break
                
                # Process each user in this batch
                for user in users_response.data:
                    user_id = user["user_id"]
                    amplitude_id = user["amplitude_id"]
                    
                    # Get all events for this user
                    events_response = supabase.table(TABLE_NAME)\
                        .select("event_time")\
                        .eq("amplitude_id", amplitude_id)\
                        .execute()
                    
                    if events_response.data:
                        # Initialize hour counters
                        hour_counts = {str(i).zfill(2): 0 for i in range(24)}
                        
                        # Count events by hour
                        for event in events_response.data:
                            event_time = datetime.fromisoformat(event["event_time"].replace('Z', '+00:00'))
                            hour = str(event_time.hour).zfill(2)
                            hour_counts[hour] += 1
                        
                        # Get only hours with events
                        active_hours = [(hour, count) for hour, count in hour_counts.items() if count > 0]
                        
                        # Sort by count and get top 5
                        top_5_periods = sorted(active_hours, key=lambda x: x[1], reverse=True)[:5]
                        
                        # Format the results
                        active_periods = [
                            {
                                "hour": hour,
                                "count": count,
                                "period": f"{hour}:00-{str((int(hour) + 1) % 24).zfill(2)}:00"
                            }
                            for hour, count in top_5_periods
                        ]
                        
                        # Update user metrics
                        supabase.table("user_table")\
                            .update({"daily_active_periods": active_periods})\
                            .eq("user_id", user_id)\
                            .execute()
                    
                    else:
                        # No events
                        supabase.table("user_table")\
                            .update({"daily_active_periods": []})\
                            .eq("user_id", user_id)\
                            .execute()
                    
                    print(f"Updated active periods for user {user_id}")
                    last_user_id = user_id
                
                print(f"Completed batch up to user {last_user_id}")
                sleep(1)  # Rate limiting

            except Exception as batch_error:
                print(f"Batch error: {batch_error}")
                sleep(5)  # Longer sleep on error
                continue

        print("Completed updating all users' active periods")
        
    except Exception as e:
        print(f"Error updating active periods: {e}")

# Execute the update
update_user_active_periods()


Added daily_active_periods column if it didn't exist
Batch error: {'code': 'PGRST204', 'details': None, 'hint': None, 'message': "Could not find the 'daily_active_periods' column of 'user_table' in the schema cache"}
Updated active periods for user 1
Updated active periods for user 2
Updated active periods for user 3
Updated active periods for user 4
Updated active periods for user 5
Updated active periods for user 6
Updated active periods for user 7
Updated active periods for user 8
Updated active periods for user 9
Updated active periods for user 10
Updated active periods for user 11
Updated active periods for user 12
Updated active periods for user 13
Updated active periods for user 14
Updated active periods for user 15
Updated active periods for user 16
Updated active periods for user 17
Updated active periods for user 18
Updated active periods for user 19
Updated active periods for user 20
Updated active periods for user 21
Updated active periods for user 22
Updated active periods

In [8]:
# Query to get session times for all users
query_response = supabase.table("user_table_refined").select("average_session_time").execute()

# Filter out None values and collect all session times
session_times = [
    user["average_session_time"] 
    for user in query_response.data 
    if user["average_session_time"] is not None
]

if session_times:
    # Calculate statistics
    total_users = len(session_times)
    overall_average = sum(session_times) / total_users
    max_duration = max(session_times)
    
    # Convert all times to minutes
    average_minutes = overall_average / 60
    max_minutes = max_duration / 60
    
    # Calculate median
    sorted_times = sorted(session_times)
    median = sorted_times[len(sorted_times) // 2]
    median_minutes = median / 60
    
    # Calculate 25th and 75th percentiles
    p25 = sorted_times[len(sorted_times) // 4] / 60
    p75 = sorted_times[len(sorted_times) * 3 // 4] / 60

    print("\nSession Time Statistics:")
    print("--------------------------------")
    print(f"Number of Users: {total_users:,}")  # Add comma separator for large numbers
    print(f"Average Session Duration: {average_minutes:.2f} minutes")
    print(f"Maximum Session Duration: {max_minutes:.2f} minutes")
    print(f"Median Session Duration: {median_minutes:.2f} minutes")
    print(f"25th Percentile: {p25:.2f} minutes")
    print(f"75th Percentile: {p75:.2f} minutes")
    print("--------------------------------")
else:
    print("No session time data available")


Session Time Statistics:
--------------------------------
Number of Users: 1,675
Average Session Duration: 2.78 minutes
Maximum Session Duration: 104.15 minutes
Median Session Duration: 0.00 minutes
25th Percentile: 0.00 minutes
75th Percentile: 0.04 minutes
--------------------------------


In [10]:
# Query to get total session times for all users
query_response = supabase.table("user_table_refined").select("total_session_time").execute()

# Filter out None values and collect all total session times
session_times = [
    user["total_session_time"] 
    for user in query_response.data 
    if user["total_session_time"] is not None
]

if session_times:
    # Calculate statistics
    total_users = len(session_times)
    overall_average = sum(session_times) / total_users
    max_duration = max(session_times)
    
    # Convert all times to hours for better readability
    average_hours = overall_average / 3600  # Convert seconds to hours
    max_hours = max_duration / 3600
    
    # Calculate median
    sorted_times = sorted(session_times)
    median = sorted_times[len(sorted_times) // 2]
    median_hours = median / 3600
    
    # Calculate 25th and 75th percentiles
    p25 = sorted_times[len(sorted_times) // 4] / 3600
    p75 = sorted_times[len(sorted_times) * 3 // 4] / 3600

    print("\nTotal Session Time Statistics:")
    print("--------------------------------")
    print(f"Number of Users: {total_users:,}")  # Add comma separator for large numbers
    print(f"Average Total Time: {average_hours:.2f} hours")
    print(f"Maximum Total Time: {max_hours:.2f} hours")
    print(f"Median Total Time: {median_hours:.2f} hours")
    print(f"25th Percentile: {p25:.2f} hours")
    print(f"75th Percentile: {p75:.2f} hours")
    print("--------------------------------")
else:
    print("No total session time data available")



Total Session Time Statistics:
--------------------------------
Number of Users: 1,675
Average Total Time: 0.73 hours
Maximum Total Time: 26.75 hours
Median Total Time: 0.00 hours
25th Percentile: 0.00 hours
75th Percentile: 0.00 hours
--------------------------------


In [11]:
# Query to get session frequencies for all users
query_response = supabase.table("user_table_refined").select("frequency_of_sessions").execute()

# Filter out None values and collect all frequencies
session_frequencies = [
    user["frequency_of_sessions"] 
    for user in query_response.data 
    if user["frequency_of_sessions"] is not None
]

if session_frequencies:
    # Calculate statistics
    total_users = len(session_frequencies)
    overall_average = sum(session_frequencies) / total_users
    max_frequency = max(session_frequencies)
    
    # Calculate median
    sorted_frequencies = sorted(session_frequencies)
    median = sorted_frequencies[len(sorted_frequencies) // 2]
    
    # Calculate 25th and 75th percentiles
    p25 = sorted_frequencies[len(sorted_frequencies) // 4]
    p75 = sorted_frequencies[len(sorted_frequencies) * 3 // 4]

    print("\nSession Frequency Statistics (30-day period):")
    print("--------------------------------")
    print(f"Number of Users: {total_users:,}")  # Add comma separator for large numbers
    print(f"Average Sessions per User: {overall_average:.1f}")
    print(f"Maximum Sessions: {max_frequency}")
    print(f"Median Sessions: {median}")
    print(f"25th Percentile: {p25}")
    print(f"75th Percentile: {p75}")
    print("--------------------------------")
    
    # Calculate distribution buckets
    print("\nSession Frequency Distribution:")
    print("--------------------------------")
    ranges = [(0, 1), (2, 5), (6, 10), (11, 20), (21, 30), (31, float('inf'))]
    for start, end in ranges:
        if end == float('inf'):
            count = sum(1 for f in session_frequencies if f >= start)
            percentage = (count / total_users) * 100
            print(f"{start}+ sessions: {count:,} users ({percentage:.1f}%)")
        else:
            count = sum(1 for f in session_frequencies if start <= f <= end)
            percentage = (count / total_users) * 100
            print(f"{start}-{end} sessions: {count:,} users ({percentage:.1f}%)")
    print("--------------------------------")
else:
    print("No session frequency data available")



Session Frequency Statistics (30-day period):
--------------------------------
Number of Users: 1,675
Average Sessions per User: 4.2
Maximum Sessions: 104
Median Sessions: 0
25th Percentile: 0
75th Percentile: 1
--------------------------------

Session Frequency Distribution:
--------------------------------
0-1 sessions: 1,296 users (77.4%)
2-5 sessions: 99 users (5.9%)
6-10 sessions: 52 users (3.1%)
11-20 sessions: 91 users (5.4%)
21-30 sessions: 61 users (3.6%)
31+ sessions: 76 users (4.5%)
--------------------------------


In [12]:
# Query to get 30-day retention rates for all users
query_response = supabase.table("user_table_refined").select("user_retention_30").execute()

# Filter out None values and collect all retention rates
retention_rates = [
    user["user_retention_30"] 
    for user in query_response.data 
    if user["user_retention_30"] is not None
]

if retention_rates:
    # Calculate statistics
    total_users = len(retention_rates)
    overall_average = sum(retention_rates) / total_users
    max_retention = max(retention_rates)
    
    # Calculate median
    sorted_rates = sorted(retention_rates)
    median = sorted_rates[len(sorted_rates) // 2]
    
    # Calculate 25th and 75th percentiles
    p25 = sorted_rates[len(sorted_rates) // 4]
    p75 = sorted_rates[len(sorted_rates) * 3 // 4]

    print("\n30-Day Retention Statistics:")
    print("--------------------------------")
    print(f"Number of Users: {total_users:,}")
    print(f"Average Retention Rate: {overall_average:.1%}")
    print(f"Maximum Retention Rate: {max_retention:.1%}")
    print(f"Median Retention Rate: {median:.1%}")
    print(f"25th Percentile: {p25:.1%}")
    print(f"75th Percentile: {p75:.1%}")
    print("--------------------------------")
    
    # Calculate distribution buckets
    print("\nRetention Rate Distribution:")
    print("--------------------------------")
    ranges = [(0, 0.1), (0.1, 0.3), (0.3, 0.5), (0.5, 0.7), (0.7, 0.9), (0.9, 1.0)]
    for start, end in ranges:
        count = sum(1 for r in retention_rates if start <= r <= end)
        percentage = (count / total_users) * 100
        print(f"{start*100:.0f}%-{end*100:.0f}%: {count:,} users ({percentage:.1f}%)")
    print("--------------------------------")
else:
    print("No retention rate data available")



30-Day Retention Statistics:
--------------------------------
Number of Users: 1,675
Average Retention Rate: 1.9%
Maximum Retention Rate: 16.7%
Median Retention Rate: 0.0%
25th Percentile: 0.0%
75th Percentile: 3.3%
--------------------------------

Retention Rate Distribution:
--------------------------------
0%-10%: 1,644 users (98.1%)
10%-30%: 177 users (10.6%)
30%-50%: 0 users (0.0%)
50%-70%: 0 users (0.0%)
70%-90%: 0 users (0.0%)
90%-100%: 0 users (0.0%)
--------------------------------


In [None]:
# Query to get daily active periods for all users
query_response = supabase.table("user_table_refined").select("daily_active_periods").execute()

# Flatten all periods and count occurrences
period_counts = {}
for user in query_response.data:
    if user["daily_active_periods"]:  # Check if not None or empty
        # Count each unique period only once per user
        user_periods = set(period["hour"] for period in user["daily_active_periods"])
        for period in user_periods:
            period_counts[period] = period_counts.get(period, 0) + 1

# Get top 5 most common periods
top_5_periods = sorted(
    period_counts.items(),
    key=lambda x: x[1],  # Sort by count
    reverse=True
)[:5]

# Calculate percentage of users for each period
total_users = len([u for u in query_response.data if u["daily_active_periods"]])

print("\nTop 5 Most Active Time Periods:")
print("--------------------------------")
for hour, count in top_5_periods:
    percentage = (count / total_users) * 100 if total_users > 0 else 0
    # Format the time period
    start_hour = int(hour)
    end_hour = (start_hour + 1) % 24
    period = f"{str(start_hour).zfill(2)}:00-{str(end_hour).zfill(2)}:00"
    
    print(f"Time Period: {period}")
    print(f"Number of Users: {count:,}")
    print(f"Percentage of Users: {percentage:.1f}%")
    print("--------------------------------")


In [14]:
# Query to get daily active periods for all users
query_response = supabase.table("user_table_refined").select("daily_active_periods").execute()

# Flatten all periods and count occurrences
period_counts = {}
for user in query_response.data:
    if user["daily_active_periods"]:  # Check if not None or empty
        # Count each unique period only once per user
        user_periods = set(period["hour"] for period in user["daily_active_periods"])
        for period in user_periods:
            period_counts[period] = period_counts.get(period, 0) + 1

# Get top 10 most common periods
top_10_periods = sorted(
    period_counts.items(),
    key=lambda x: x[1],  # Sort by count
    reverse=True
)[:10]

# Calculate percentage of users for each period
total_users = len([u for u in query_response.data if u["daily_active_periods"]])

print("\nTop 10 Most Active Time Periods:")
print("--------------------------------")
for hour, count in top_10_periods:
    percentage = (count / total_users) * 100 if total_users > 0 else 0
    # Format the time period
    start_hour = int(hour)
    end_hour = (start_hour + 1) % 24
    period = f"{str(start_hour).zfill(2)}:00-{str(end_hour).zfill(2)}:00"
    
    print(f"Time Period: {period}")
    print(f"Number of Users: {count:,}")
    print(f"Percentage of Users: {percentage:.1f}%")
    print("--------------------------------")



Top 10 Most Active Time Periods:
--------------------------------
Time Period: 15:00-16:00
Number of Users: 757
Percentage of Users: 45.2%
--------------------------------
Time Period: 19:00-20:00
Number of Users: 717
Percentage of Users: 42.8%
--------------------------------
Time Period: 14:00-15:00
Number of Users: 709
Percentage of Users: 42.3%
--------------------------------
Time Period: 16:00-17:00
Number of Users: 693
Percentage of Users: 41.4%
--------------------------------
Time Period: 18:00-19:00
Number of Users: 671
Percentage of Users: 40.1%
--------------------------------
Time Period: 17:00-18:00
Number of Users: 653
Percentage of Users: 39.0%
--------------------------------
Time Period: 20:00-21:00
Number of Users: 597
Percentage of Users: 35.6%
--------------------------------
Time Period: 13:00-14:00
Number of Users: 486
Percentage of Users: 29.0%
--------------------------------
Time Period: 21:00-22:00
Number of Users: 449
Percentage of Users: 26.8%
------------