Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@
"test:cloudflare:api": "vitest run --exclude=tests/cli* --exclude=tests/updates* --exclude=tests/stats* --exclude=tests/channel_self* --config vitest.config.cloudflare.ts",
"test:cloudflare:updates": "vitest run tests/updates* --config vitest.config.cloudflare.ts",
"stripe:backfill-retention-metrics": "bun scripts/backfill_retention_metrics.ts",
"stripe:backfill-org-conversion-rate": "bun scripts/backfill_org_conversion_rate_trend.ts",
"stripe:backfill-customer-countries": "bun scripts/backfill_stripe_customer_countries.ts",
"stripe:sync-org-names": "bun scripts/sync_stripe_org_names.ts",
"lint": "eslint \"src/**/*.{vue,ts,js}\"",
"fmt": "bun run lint:fix && bun run lint:sql",
Expand Down
114 changes: 114 additions & 0 deletions scripts/admin_stripe_backfill_utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import type { Database } from '../supabase/functions/_backend/utils/supabase.types.ts'
import { existsSync } from 'node:fs'
import { readFile } from 'node:fs/promises'
import { createClient } from '@supabase/supabase-js'
import Stripe from 'stripe'

export const DEFAULT_ENV_FILE = './internal/cloudflare/.env.prod'

export function getArgValue(args: string[], prefix: string): string | null {
const arg = args.find(value => value.startsWith(`${prefix}=`))
if (!arg)
return null
return arg.slice(prefix.length + 1)
}

export async function loadEnv(filePath: string) {
if (!existsSync(filePath))
return {}

const text = await readFile(filePath, 'utf8')
const env: Record<string, string> = {}

for (const line of text.split('\n')) {
const trimmed = line.trim()
if (!trimmed || trimmed.startsWith('#'))
continue

const separatorIndex = trimmed.indexOf('=')
if (separatorIndex <= 0)
continue

const key = trimmed.slice(0, separatorIndex)
let value = trimmed.slice(separatorIndex + 1)
if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith('\'') && value.endsWith('\'')))
value = value.slice(1, -1)
env[key] = value
}

return env
}

export function getRequiredEnv(env: Record<string, string | undefined>, key: string) {
const value = env[key]?.trim()
if (!value)
throw new Error(`Missing ${key}`)
return value
}

export function getSupabaseServiceRoleKey(env: Record<string, string | undefined>) {
const value = env.SUPABASE_SERVICE_ROLE_KEY?.trim() || env.SUPABASE_SERVICE_KEY?.trim()
if (!value)
throw new Error('Missing SUPABASE_SERVICE_ROLE_KEY')
return value
}

export function createSupabaseServiceClient(env: Record<string, string | undefined>) {
return createClient<Database>(
getRequiredEnv(env, 'SUPABASE_URL'),
getSupabaseServiceRoleKey(env),
{ auth: { autoRefreshToken: false, persistSession: false, detectSessionInUrl: false } },
)
}

export function createStripeClient(secretKey: string, apiBaseUrl?: string) {
let hostConfig: Partial<Pick<NonNullable<ConstructorParameters<typeof Stripe>[1]>, 'host' | 'port' | 'protocol'>> = {}

if (apiBaseUrl?.trim()) {
const parsed = new URL(apiBaseUrl)
hostConfig = {
host: parsed.hostname,
port: Number.parseInt(parsed.port || (parsed.protocol === 'https:' ? '443' : '80'), 10),
protocol: parsed.protocol.replace(':', '') as 'http' | 'https',
}
}

type StripeApiVersion = NonNullable<ConstructorParameters<typeof Stripe>[1]>['apiVersion']
return new Stripe(secretKey, {
apiVersion: '2026-03-25.dahlia' as StripeApiVersion,
httpClient: Stripe.createFetchHttpClient(),
...hostConfig,
})
}

export async function asyncPool<T>(limit: number, items: T[], iterator: (item: T) => Promise<void>) {
const executing = new Set<Promise<void>>()

for (const item of items) {
const task = iterator(item).finally(() => {
executing.delete(task)
})
executing.add(task)

if (executing.size >= limit)
await Promise.race(executing)
}

await Promise.all(executing)
}

export function parsePositiveInteger(value: string | null, label: string, fallback: number) {
if (value === null)
return fallback

const parsed = Number.parseInt(value, 10)
if (!Number.isInteger(parsed) || parsed < 1)
throw new Error(`${label} must be a positive integer`)

return parsed
}

export function isActionableStripeCustomerId(customerId: string | null | undefined) {
const trimmedCustomerId = customerId?.trim()
return !!trimmedCustomerId && !trimmedCustomerId.startsWith('pending_')
Comment on lines +111 to +113
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Only accept real Stripe customer IDs here.

This helper currently treats any non-empty, non-pending_ value as actionable. In scripts/backfill_stripe_customer_countries.ts, that is the last gate before stripe.customers.retrieve(...), so malformed IDs like sub_... or acct_... become avoidable failures and can abort the run. Filter explicitly to cus_ IDs.

Suggested fix
 export function isActionableStripeCustomerId(customerId: string | null | undefined) {
   const trimmedCustomerId = customerId?.trim()
-  return !!trimmedCustomerId && !trimmedCustomerId.startsWith('pending_')
+  return !!trimmedCustomerId
+    && trimmedCustomerId.startsWith('cus_')
+    && !trimmedCustomerId.startsWith('pending_')
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/admin_stripe_backfill_utils.ts` around lines 111 - 113, The
isActionableStripeCustomerId helper currently treats any trimmed, non-`pending_`
value as valid; change it to only accept real Stripe customer IDs by ensuring
the trimmed value both startsWith('cus_') and is not prefixed with 'pending_'.
Update the function isActionableStripeCustomerId to return true only when
customerId is non-null/defined, trimmed, startsWith('cus_'), and does not
startWith('pending_') so malformed IDs like 'sub_' or 'acct_' are rejected
before calling stripe.customers.retrieve.

}
249 changes: 249 additions & 0 deletions scripts/backfill_org_conversion_rate_trend.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
/*
* Backfill the admin "Org Conversion Rate Trend" metric.
*
* The historical paying counts in public.global_stats are Stripe-backed
* snapshots written by the admin stats cron. The raw org count was not stored,
* so this script reconstructs that denominator from public.orgs.created_at.
*
* Dry run, defaulting to the last 30 UTC calendar days:
* bun run stripe:backfill-org-conversion-rate
*
* Apply a date range:
* bun run stripe:backfill-org-conversion-rate --apply --from=2026-02-01 --to=2026-04-30
*
* Apply every stored global_stats row:
* bun run stripe:backfill-org-conversion-rate --apply --all
*/
import type { Database } from '../supabase/functions/_backend/utils/supabase.types.ts'
import process from 'node:process'
import { asyncPool, createSupabaseServiceClient, DEFAULT_ENV_FILE, getArgValue, loadEnv, parsePositiveInteger } from './admin_stripe_backfill_utils.ts'

const DEFAULT_LOOKBACK_DAYS = 30
const DEFAULT_CONCURRENCY = 10
const DEFAULT_PAGE_SIZE = 1000
const DATE_ID_REGEX = /^\d{4}-\d{2}-\d{2}$/

type SupabaseClient = ReturnType<typeof createSupabaseServiceClient>
type GlobalStatsRow = Pick<
Database['public']['Tables']['global_stats']['Row'],
'date_id' | 'paying' | 'org_conversion_rate'
>
type OrgCreatedAtRow = Pick<Database['public']['Tables']['orgs']['Row'], 'created_at'>

export interface OrgConversionRateBackfillRow {
changed: boolean
current_rate: number
date_id: string
orgs: number
paying: number
next_rate: number
}

function getDateId(targetDate = new Date()) {
return new Date(Date.UTC(targetDate.getUTCFullYear(), targetDate.getUTCMonth(), targetDate.getUTCDate())).toISOString().slice(0, 10)
}

function assertDateId(value: string, label: string) {
if (!DATE_ID_REGEX.test(value))
throw new Error(`${label} must use YYYY-MM-DD`)
return value
Comment on lines +46 to +49
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Reject impossible calendar dates, not just YYYY-MM-DD strings.

assertDateId() only checks the shape. Inputs like 2026-02-31 still pass, and --from is later used as a plain string filter in fetchGlobalStatsRows(), so the script can target the wrong range instead of failing fast.

Suggested fix
 function assertDateId(value: string, label: string) {
   if (!DATE_ID_REGEX.test(value))
     throw new Error(`${label} must use YYYY-MM-DD`)
+
+  const parsed = new Date(`${value}T00:00:00.000Z`)
+  if (Number.isNaN(parsed.getTime()) || getDateId(parsed) !== value)
+    throw new Error(`${label} must be a real UTC calendar date`)
+
   return value
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
function assertDateId(value: string, label: string) {
if (!DATE_ID_REGEX.test(value))
throw new Error(`${label} must use YYYY-MM-DD`)
return value
function assertDateId(value: string, label: string) {
if (!DATE_ID_REGEX.test(value))
throw new Error(`${label} must use YYYY-MM-DD`)
const parsed = new Date(`${value}T00:00:00.000Z`)
if (Number.isNaN(parsed.getTime()) || getDateId(parsed) !== value)
throw new Error(`${label} must be a real UTC calendar date`)
return value
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/backfill_org_conversion_rate_trend.ts` around lines 46 - 49,
assertDateId currently only checks DATE_ID_REGEX; update it to also reject
impossible calendar dates by parsing the YYYY-MM-DD parts and validating them:
split value into year, month, day, construct a Date (or use a reliable date
library) and confirm the Date is valid and that its year/month/day match the
parsed components (to catch things like 2026-02-31), and throw the same
Error(`${label} must use YYYY-MM-DD and be a valid date`) when invalid; keep the
original function name assertDateId and leave DATE_ID_REGEX in place for the
shape check so fetchGlobalStatsRows will never receive nonsensical date strings.

}

function getDefaultFromDateId(referenceDate = new Date()) {
const date = new Date(Date.UTC(referenceDate.getUTCFullYear(), referenceDate.getUTCMonth(), referenceDate.getUTCDate()))
date.setUTCDate(date.getUTCDate() - DEFAULT_LOOKBACK_DAYS + 1)
return getDateId(date)
}

function getNextDateId(dateId: string) {
const date = new Date(`${dateId}T00:00:00.000Z`)
date.setUTCDate(date.getUTCDate() + 1)
return getDateId(date)
}

function toMetricNumber(value: number | string | null | undefined) {
const numberValue = Number(value ?? 0)
return Number.isFinite(numberValue) ? numberValue : 0
}

export function calculateOrgConversionRate(paying: number | string | null | undefined, orgs: number | string | null | undefined) {
const payingCount = toMetricNumber(paying)
const orgCount = toMetricNumber(orgs)
if (orgCount <= 0)
return 0
return Number(((payingCount * 100) / orgCount).toFixed(1))
}

function buildOrgCountsByDateId(dateIds: string[], orgRows: OrgCreatedAtRow[]) {
const orgCreatedAtTimes = orgRows
.map(row => row.created_at ? Date.parse(row.created_at) : Number.NaN)
.filter(Number.isFinite)
.sort((left, right) => left - right)
const countsByDateId = new Map<string, number>()
let orgIndex = 0

for (const dateId of [...dateIds].sort()) {
const endExclusive = Date.parse(`${getNextDateId(dateId)}T00:00:00.000Z`)
while (orgIndex < orgCreatedAtTimes.length && orgCreatedAtTimes[orgIndex]! < endExclusive)
orgIndex++
countsByDateId.set(dateId, orgIndex)
}

return countsByDateId
}

export function buildOrgConversionRateBackfillRows(rows: GlobalStatsRow[], orgRows: OrgCreatedAtRow[]): OrgConversionRateBackfillRow[] {
const orgCountsByDateId = buildOrgCountsByDateId(rows.map(row => row.date_id), orgRows)

return rows.map((row) => {
const orgs = orgCountsByDateId.get(row.date_id) ?? 0
const paying = toMetricNumber(row.paying)
const currentRate = toMetricNumber(row.org_conversion_rate)
const nextRate = calculateOrgConversionRate(paying, orgs)
return {
date_id: row.date_id,
orgs,
paying,
current_rate: currentRate,
next_rate: nextRate,
changed: Math.abs(currentRate - nextRate) > 0.0001,
}
})
}

async function fetchGlobalStatsRows(supabase: SupabaseClient, fromDateId: string | null, toDateId: string | null) {
const rows: GlobalStatsRow[] = []
let offset = 0

while (true) {
let query = supabase
.from('global_stats')
.select('date_id, paying, org_conversion_rate')
.order('date_id', { ascending: true })
.range(offset, offset + DEFAULT_PAGE_SIZE - 1)

if (fromDateId)
query = query.gte('date_id', fromDateId)
if (toDateId)
query = query.lte('date_id', toDateId)

const { data, error } = await query
if (error)
throw error
if (!data?.length)
break

rows.push(...data)
if (data.length < DEFAULT_PAGE_SIZE)
break
offset += DEFAULT_PAGE_SIZE
}

return rows
}

async function fetchOrgCreatedAtRows(supabase: SupabaseClient, toDateId: string | null) {
const rows: OrgCreatedAtRow[] = []
let offset = 0

while (true) {
let query = supabase
.from('orgs')
.select('created_at')
.order('created_at', { ascending: true })
.range(offset, offset + DEFAULT_PAGE_SIZE - 1)
Comment on lines +153 to +154
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Make org pagination deterministic for backfill reads

Ordering orgs only by created_at and paginating with range(offset, ...) can skip or duplicate rows when multiple orgs share the same timestamp, because tie ordering is not stable across pages. In that case the reconstructed denominator becomes incorrect and org_conversion_rate updates are wrong for affected dates. This is especially likely on historical bulk inserts where many rows have identical created_at values; include a unique secondary sort key (or keyset pagination) to guarantee deterministic paging.

Useful? React with 👍 / 👎.


if (toDateId)
query = query.lt('created_at', `${getNextDateId(toDateId)}T00:00:00.000Z`)

const { data, error } = await query
if (error)
throw error
if (!data?.length)
break

rows.push(...data)
if (data.length < DEFAULT_PAGE_SIZE)
break
offset += DEFAULT_PAGE_SIZE
}

return rows
}

async function updateConversionRate(supabase: SupabaseClient, row: OrgConversionRateBackfillRow) {
const { error } = await supabase
.from('global_stats')
.update({ org_conversion_rate: row.next_rate })
.eq('date_id', row.date_id)

if (error)
throw error
}

async function main(args = process.argv.slice(2), runtimeEnv: Record<string, string | undefined> = process.env) {
const apply = args.includes('--apply')
const all = args.includes('--all')
const envFile = getArgValue(args, '--env-file') ?? DEFAULT_ENV_FILE
const concurrency = parsePositiveInteger(getArgValue(args, '--concurrency'), '--concurrency', DEFAULT_CONCURRENCY)
const fromDateId = all
? null
: assertDateId(getArgValue(args, '--from') ?? getDefaultFromDateId(), '--from')
const toDateId = all
? null
: assertDateId(getArgValue(args, '--to') ?? getDateId(), '--to')

if (fromDateId && toDateId && fromDateId > toDateId)
throw new Error('--from must be before or equal to --to')

const fileEnv = await loadEnv(envFile)
const env = {
...fileEnv,
...runtimeEnv,
}
const supabase = createSupabaseServiceClient(env)

const rows = await fetchGlobalStatsRows(supabase, fromDateId, toDateId)
const orgRows = await fetchOrgCreatedAtRows(supabase, toDateId)
const backfillRows = buildOrgConversionRateBackfillRows(rows, orgRows)
const changedRows = backfillRows.filter(row => row.changed)

console.log(`Loaded ${rows.length} global_stats rows`)
console.log(`Loaded ${orgRows.length} org rows for denominator reconstruction`)
console.log(`Env file: ${envFile}`)
if (all)
console.log('Scope: all global_stats rows')
else
console.log(`Scope: ${fromDateId} to ${toDateId}`)
console.log(`Rows needing update: ${changedRows.length}`)

const sampleRows = changedRows.slice(0, 10)
if (sampleRows.length > 0) {
console.log('Sample updates:')
for (const row of sampleRows)
console.log(`${row.date_id}: ${row.current_rate}% -> ${row.next_rate}% (${row.paying}/${row.orgs})`)
}

if (!apply) {
console.log('Dry run only. Pass --apply to update global_stats.')
return
}

if (changedRows.length === 0) {
console.log('Nothing to update.')
return
}

let updated = 0
await asyncPool(concurrency, changedRows, async (row) => {
await updateConversionRate(supabase, row)
updated++
if (updated % 100 === 0 || updated === changedRows.length)
console.log(`Updated ${updated}/${changedRows.length}`)
})

console.log(`Done. Updated ${updated}/${changedRows.length} org conversion rate rows.`)
Comment on lines +237 to +245
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Handle per-row update failures before continuing.

A single rejected updateConversionRate() makes asyncPool() reject immediately, but the other in-flight updates have already started. That can leave a partially applied backfill with no record of which date_ids failed. Catch and collect row-level failures here, then report them at the end.

Suggested fix
   let updated = 0
+  const failures: Array<{ date_id: string, error: string }> = []
   await asyncPool(concurrency, changedRows, async (row) => {
-    await updateConversionRate(supabase, row)
-    updated++
-    if (updated % 100 === 0 || updated === changedRows.length)
-      console.log(`Updated ${updated}/${changedRows.length}`)
+    try {
+      await updateConversionRate(supabase, row)
+      updated++
+      if (updated % 100 === 0 || updated === changedRows.length)
+        console.log(`Updated ${updated}/${changedRows.length}`)
+    }
+    catch (error) {
+      failures.push({
+        date_id: row.date_id,
+        error: error instanceof Error ? error.message : String(error),
+      })
+    }
   })
 
+  if (failures.length > 0)
+    throw new Error(`Org conversion rate backfill completed with ${failures.length} failures`)
+
   console.log(`Done. Updated ${updated}/${changedRows.length} org conversion rate rows.`)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
let updated = 0
await asyncPool(concurrency, changedRows, async (row) => {
await updateConversionRate(supabase, row)
updated++
if (updated % 100 === 0 || updated === changedRows.length)
console.log(`Updated ${updated}/${changedRows.length}`)
})
console.log(`Done. Updated ${updated}/${changedRows.length} org conversion rate rows.`)
let updated = 0
const failures: Array<{ date_id: string, error: string }> = []
await asyncPool(concurrency, changedRows, async (row) => {
try {
await updateConversionRate(supabase, row)
updated++
if (updated % 100 === 0 || updated === changedRows.length)
console.log(`Updated ${updated}/${changedRows.length}`)
}
catch (error) {
failures.push({
date_id: row.date_id,
error: error instanceof Error ? error.message : String(error),
})
}
})
if (failures.length > 0)
throw new Error(`Org conversion rate backfill completed with ${failures.length} failures`)
console.log(`Done. Updated ${updated}/${changedRows.length} org conversion rate rows.`)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/backfill_org_conversion_rate_trend.ts` around lines 237 - 245, The
asyncPool worker currently lets a single rejected updateConversionRate() reject
the whole pool and leaves in-flight tasks untracked; instead, wrap the per-row
call to updateConversionRate(supabase, row) in a try/catch inside the asyncPool
callback, push failed row identifiers (e.g., row.date_id or the full row) into a
local failures array, increment updated only on success, and continue so other
in-flight tasks can finish; after asyncPool resolves, log or throw a summary
using failures and updated to surface which date_id(s) failed and ensure the
script exits non-zero if you need a failing CI signal.

}

if (import.meta.main)
await main()
Loading
Loading