# Bluegrass Songbook Analytics Dashboard

Analytics for visitor engagement, user activity, and content curation.

In [None]:
# Setup
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
from supabase import create_client, ClientOptions
import pandas as pd
import matplotlib.pyplot as plt

# Load credentials from .env
load_dotenv()

SUPABASE_URL = os.getenv('SUPABASE_URL')
SUPABASE_KEY = os.getenv('SUPABASE_SERVICE_KEY')

if not SUPABASE_URL or not SUPABASE_KEY:
    raise ValueError('Missing SUPABASE_URL or SUPABASE_SERVICE_KEY in .env')

# Create client with session management disabled (server-side pattern)
# This avoids session persistence and reduces credential leak risk
supabase = create_client(
    SUPABASE_URL,
    SUPABASE_KEY,
    options=ClientOptions(
        auto_refresh_token=False,
        persist_session=False,
    )
)
print('Connected to Supabase (service role, no session)')

## Visitor Statistics

In [None]:
# Daily visitor stats
result = supabase.table('visitor_stats').select('*').order('date', desc=True).limit(30).execute()
df_visits = pd.DataFrame(result.data)

if not df_visits.empty:
    df_visits['date'] = pd.to_datetime(df_visits['date'])
    df_visits = df_visits.sort_values('date')
    
    print(f"Total unique visitors: {df_visits['unique_visitors'].sum():,}")
    print(f"Total page views: {df_visits['page_views'].sum():,}")
    print(f"\nLast 7 days:")
    display(df_visits.tail(7))
else:
    print('No visitor data yet')

In [None]:
# Plot daily visitors
if not df_visits.empty:
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.bar(df_visits['date'], df_visits['page_views'], alpha=0.7, label='Page Views')
    ax.plot(df_visits['date'], df_visits['unique_visitors'], 'ro-', label='Unique Visitors')
    ax.set_xlabel('Date')
    ax.set_ylabel('Count')
    ax.set_title('Daily Traffic (Last 30 Days)')
    ax.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## User Activity

In [None]:
# Get all users with their activity counts
# Note: auth.users is not directly accessible, but we can count activity from our tables

# Song votes by user
votes = supabase.table('song_votes').select('user_id, song_id').execute()
df_votes = pd.DataFrame(votes.data)

# Genre suggestions by user
suggestions = supabase.table('genre_suggestions').select('user_id, song_id, raw_tag').execute()
df_suggestions = pd.DataFrame(suggestions.data)

# Tag votes by user
tag_votes = supabase.table('tag_votes').select('user_id, song_id, tag_name, vote_value').execute()
df_tag_votes = pd.DataFrame(tag_votes.data)

print(f"Total song votes: {len(df_votes)}")
print(f"Total genre suggestions: {len(df_suggestions)}")
print(f"Total tag votes: {len(df_tag_votes)}")

In [None]:
# Top users by total activity
activity = {}

if not df_votes.empty:
    for user_id in df_votes['user_id']:
        activity[user_id] = activity.get(user_id, 0) + 1

if not df_suggestions.empty:
    for user_id in df_suggestions['user_id']:
        activity[user_id] = activity.get(user_id, 0) + 1

if not df_tag_votes.empty:
    for user_id in df_tag_votes['user_id']:
        activity[user_id] = activity.get(user_id, 0) + 1

if activity:
    df_activity = pd.DataFrame([
        {'user_id': uid, 'total_actions': count}
        for uid, count in activity.items()
    ]).sort_values('total_actions', ascending=False)
    
    print(f"Total active users: {len(df_activity)}")
    print(f"\nTop 10 most active users:")
    display(df_activity.head(10))
else:
    print('No user activity yet')

## Song Engagement

In [None]:
# Most voted songs
if not df_votes.empty:
    top_voted = df_votes.groupby('song_id').size().sort_values(ascending=False).head(10)
    print("Top 10 most voted songs:")
    for song_id, count in top_voted.items():
        print(f"  {song_id}: {count} votes")
else:
    print('No song votes yet')

In [None]:
# Most tagged songs (genre suggestions)
if not df_suggestions.empty:
    top_tagged = df_suggestions.groupby('song_id').size().sort_values(ascending=False).head(10)
    print("Top 10 most tagged songs:")
    for song_id, count in top_tagged.items():
        print(f"  {song_id}: {count} tag suggestions")
else:
    print('No genre suggestions yet')

## Tag Analytics

In [None]:
# Most suggested tags
if not df_suggestions.empty:
    tag_counts = df_suggestions['raw_tag'].value_counts().head(20)
    print("Top 20 suggested tags:")
    for tag, count in tag_counts.items():
        print(f"  {tag}: {count}")
else:
    print('No tag suggestions yet')

In [None]:
# Tag vote summary
if not df_tag_votes.empty:
    tag_vote_summary = df_tag_votes.groupby('tag_name').agg(
        upvotes=('vote_value', lambda x: (x == 1).sum()),
        downvotes=('vote_value', lambda x: (x == -1).sum()),
        net_score=('vote_value', 'sum')
    ).sort_values('net_score', ascending=False)
    
    print("Tag vote summary (top 15):")
    display(tag_vote_summary.head(15))
else:
    print('No tag votes yet')

## Visitor Retention

In [None]:
# Visitor first/last seen analysis
visitors = supabase.table('visitors').select('visitor_id, first_seen, last_seen').execute()
df_visitors = pd.DataFrame(visitors.data)

if not df_visitors.empty:
    df_visitors['first_seen'] = pd.to_datetime(df_visitors['first_seen'])
    df_visitors['last_seen'] = pd.to_datetime(df_visitors['last_seen'])
    
    # Returning visitors (last_seen != first_seen)
    df_visitors['is_returning'] = df_visitors['last_seen'].dt.date != df_visitors['first_seen'].dt.date
    returning_count = df_visitors['is_returning'].sum()
    total_visitors = len(df_visitors)
    
    print(f"Total unique visitors: {total_visitors:,}")
    print(f"Returning visitors: {returning_count:,} ({100*returning_count/total_visitors:.1f}%)")
    print(f"One-time visitors: {total_visitors - returning_count:,}")
    
    # New visitors by day
    new_by_day = df_visitors.groupby(df_visitors['first_seen'].dt.date).size().tail(14)
    print(f"\nNew visitors per day (last 14 days):")
    for date, count in new_by_day.items():
        print(f"  {date}: {count}")
else:
    print('No visitor data yet')

## User Lists

In [None]:
# User-created lists
try:
    lists = supabase.table('user_lists').select('id, name, user_id, created_at').execute()
    df_lists = pd.DataFrame(lists.data)
    
    if not df_lists.empty:
        print(f"Total user lists: {len(df_lists)}")
        print(f"Users with lists: {df_lists['user_id'].nunique()}")
        
        # Songs per list
        list_songs = supabase.table('list_songs').select('list_id, song_id').execute()
        df_list_songs = pd.DataFrame(list_songs.data)
        
        if not df_list_songs.empty:
            songs_per_list = df_list_songs.groupby('list_id').size()
            print(f"Total songs in lists: {len(df_list_songs)}")
            print(f"Average songs per list: {songs_per_list.mean():.1f}")
    else:
        print('No user lists yet')
except Exception as e:
    print(f'Could not fetch user lists: {e}')