In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [44]:
%%bigquery df

WITH staged_events AS (
  SELECT
    user_pseudo_id,
    CASE
      WHEN event_name = 'first_visit' THEN 'visit'
      WHEN event_name LIKE 'registration%' OR event_name = 'registration'
        THEN 'registration'
      WHEN event_name = 'oauth_redirects_signup'
        THEN 'oauth_signup'
      WHEN event_name LIKE '2fa_%'
        THEN '2fa'
      WHEN event_name IN ('download_bigtime','download')
        THEN 'download_game'
      WHEN event_name = 'download_launcher'
        THEN 'download_launcher'
      WHEN event_name = 'launch_ol_launcher'
        THEN 'launch_launcher'
      ELSE NULL
    END AS stage,
    TIMESTAMP_MICROS(MIN(event_timestamp)) AS first_ts
  FROM `bigtimestudios.analytics_301962723.events_*`
  WHERE event_name IN (
    'first_visit','download_bigtime',
    'registration_country','registration_age',
    'registration_username','registration',
    'download_launcher','download',
    '2fa_phone_verification_submit','2fa_email_setup_submit',
    '2fa_email_verification_submit','oauth_redirects_signup',
    'launch_ol_launcher','2fa_app_verification_submit'
  )
  GROUP BY user_pseudo_id, stage
),

ordered_stages AS (
  SELECT
    user_pseudo_id,
    stage,
    first_ts,
    ROW_NUMBER() OVER (
      PARTITION BY user_pseudo_id
      ORDER BY first_ts
    ) AS step_num
  FROM staged_events
  WHERE stage IS NOT NULL
),

stage_pairs AS (
  SELECT
    user_pseudo_id,
    stage AS source_stage,
    LEAD(stage) OVER (
      PARTITION BY user_pseudo_id
      ORDER BY step_num
    ) AS target_stage
  FROM ordered_stages
)

SELECT
  source_stage,
  target_stage,
  COUNT(*) AS transition_count
FROM stage_pairs
WHERE target_stage IS NOT NULL
  AND source_stage <> target_stage        -- drop same-stage loops
GROUP BY 1, 2;


Query is running:   0%|          |

Downloading:   0%|          |

### Heatmap

### Sankey Flow

In [49]:
# Filter out tiny transitions
MIN_TRANSITIONS = 10
df = df[df["transition_count"] >= MIN_TRANSITIONS].copy()

# Define the ordered stages for the funnel
stage_order = [
    "visit",
    "registration",
    "oauth_signup",
    "2fa",
    "download_game",
    "download_launcher",
    "launch_launcher",
]

# Keep only stages in this order
df = df[
    df["source_stage"].isin(stage_order)
    & df["target_stage"].isin(stage_order)
].copy()

# Optional: only allow forward moves
# stage_idx = {s: i for i, s in enumerate(stage_order)}
# df = df[df["target_stage"].map(stage_idx) > df["source_stage"].map(stage_idx)]

# Build node indices from stage_order (one node per stage)
labels = stage_order
label_to_idx = {label: i for i, label in enumerate(labels)}

df["source_idx"] = df["source_stage"].map(label_to_idx)
df["target_idx"] = df["target_stage"].map(label_to_idx)

source = df["source_idx"].tolist()
target = df["target_idx"].tolist()
value  = df["transition_count"].tolist()

# make it strictly linear
n = len(stage_order)
x = [i / (n - 1) for i in range(n)]      # equally spaced horizontally
y = [0.5] * n                            # all centered vertically

fig = go.Figure(
    go.Sankey(
        arrangement="fixed",
        node=dict(
            label=labels,
            x=x,
            y=y,
            pad=15,
            thickness=20,
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
        )
    )
)

fig.update_layout(
    title_text="Linear Launcher Funnel (Stage-level)",
    font_size=11,
    width=1100,
    height=400,
)

fig.show()