From 569e27b59f8a6f3da3de928309556834a0457bad Mon Sep 17 00:00:00 2001 From: StockerMC <44980366+StockerMC@users.noreply.github.com> Date: Sun, 31 May 2026 11:18:30 -0400 Subject: [PATCH] chore(db): regenerate accurate schema.sql + add schema redesign proposal The committed schema.sql was stale (predated most tables/columns, had a 'FIXME: actually update our DB' note). Regenerate it from the live database with every table/column/index/FK/function, annotated by real-world status (ACTIVE / LEGACY / DEAD / WORKER). Add docs/schema-redesign.md: evaluates how the DB is used today (three generations of the matching pipeline, merchant/creator identity split across multiple tables, dead + worker tables in public) and proposes a cleaner 18->11 table target with a phased migration plan. --- backend/data/schema.sql | 601 ++++++++++++++++++++++++++++++---------- docs/schema-redesign.md | 178 ++++++++++++ 2 files changed, 636 insertions(+), 143 deletions(-) create mode 100644 docs/schema-redesign.md diff --git a/backend/data/schema.sql b/backend/data/schema.sql index f851d83..7d0e4d4 100644 --- a/backend/data/schema.sql +++ b/backend/data/schema.sql @@ -1,144 +1,459 @@ -CREATE TABLE youtube_shorts ( - id UUID PRIMARY KEY, - youtube_id TEXT NOT NULL, - title TEXT, - showcase_images TEXT, - products JSONB, - created_at TIMESTAMPTZ DEFAULT NOW(), - main_image_url TEXT -); - --- FIXME: ACTUALLY UPDATE OUR DB TO CONTAIN THIS FOR SHOPIFY PRODUCTS (WILL DO LATER) -CREATE TABLE shopify_products ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - shopify_id BIGINT UNIQUE NOT NULL, - store_url TEXT NOT NULL, - title TEXT NOT NULL, - handle TEXT NOT NULL, - body_html TEXT, - vendor TEXT, - product_type TEXT, - tags TEXT[], - variants JSONB NOT NULL, - images JSONB NOT NULL, - options JSONB, - published_at TIMESTAMPTZ, - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW(), - match_count INTEGER DEFAULT 0, - last_matched_at TIMESTAMPTZ, - UNIQUE(shopify_id, store_url) -); - --- Product matches table to track which videos matched which products -CREATE TABLE product_matches ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - product_id UUID REFERENCES shopify_products(id) ON DELETE CASCADE, - short_id UUID REFERENCES youtube_shorts(id) ON DELETE CASCADE, - match_score FLOAT, - matched_at TIMESTAMPTZ DEFAULT NOW(), - UNIQUE(product_id, short_id) -); - --- Index for faster queries -CREATE INDEX idx_shopify_products_store_url ON shopify_products(store_url); -CREATE INDEX idx_shopify_products_vendor ON shopify_products(vendor); -CREATE INDEX idx_shopify_products_product_type ON shopify_products(product_type); -CREATE INDEX idx_product_matches_product_id ON product_matches(product_id); -CREATE INDEX idx_product_matches_short_id ON product_matches(short_id); - --- END OF NEW STUFF - --- Shopify OAuth tokens table -CREATE TABLE shopify_oauth_tokens ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - company_id UUID REFERENCES companies(id) ON DELETE CASCADE, - shop_domain TEXT NOT NULL UNIQUE, - access_token TEXT NOT NULL, - scope TEXT NOT NULL, - token_type TEXT DEFAULT 'offline', - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW(), - expires_at TIMESTAMPTZ, - is_active BOOLEAN DEFAULT TRUE, - UNIQUE(company_id, shop_domain) -); - --- Shopify shop information table -CREATE TABLE shopify_shops ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - company_id UUID REFERENCES companies(id) ON DELETE CASCADE, - shop_domain TEXT NOT NULL UNIQUE, - shop_id BIGINT, - shop_name TEXT, - shop_owner TEXT, - email TEXT, - domain TEXT, - country TEXT, - currency TEXT, - timezone TEXT, - iana_timezone TEXT, - plan_name TEXT, - plan_display_name TEXT, - shop_created_at TIMESTAMPTZ, - province TEXT, - city TEXT, - address1 TEXT, - zip TEXT, - phone TEXT, - latitude DECIMAL, - longitude DECIMAL, - primary_locale TEXT, - money_format TEXT, - money_with_currency_format TEXT, - weight_unit TEXT, - myshopify_domain TEXT, - created_at TIMESTAMPTZ DEFAULT NOW(), - updated_at TIMESTAMPTZ DEFAULT NOW(), - last_synced_at TIMESTAMPTZ, - UNIQUE(company_id, shop_domain) -); - --- OAuth state management (for CSRF protection) -CREATE TABLE shopify_oauth_states ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - state TEXT NOT NULL UNIQUE, - company_id UUID REFERENCES companies(id) ON DELETE CASCADE, - shop_domain TEXT NOT NULL, - redirect_url TEXT, - created_at TIMESTAMPTZ DEFAULT NOW(), - expires_at TIMESTAMPTZ DEFAULT NOW() + INTERVAL '10 minutes', - used BOOLEAN DEFAULT FALSE -); - --- Indexes for faster queries -CREATE INDEX idx_shopify_oauth_tokens_company_id ON shopify_oauth_tokens(company_id); -CREATE INDEX idx_shopify_oauth_tokens_shop_domain ON shopify_oauth_tokens(shop_domain); -CREATE INDEX idx_shopify_oauth_tokens_is_active ON shopify_oauth_tokens(is_active); -CREATE INDEX idx_shopify_shops_company_id ON shopify_shops(company_id); -CREATE INDEX idx_shopify_shops_shop_domain ON shopify_shops(shop_domain); -CREATE INDEX idx_shopify_oauth_states_state ON shopify_oauth_states(state); -CREATE INDEX idx_shopify_oauth_states_expires_at ON shopify_oauth_states(expires_at); - --- Function to clean up expired OAuth states -CREATE OR REPLACE FUNCTION cleanup_expired_oauth_states() -RETURNS void AS $$ -BEGIN - DELETE FROM shopify_oauth_states WHERE expires_at < NOW(); -END; -$$ LANGUAGE plpgsql; - --- Link shopify_products to shopify_shops -ALTER TABLE shopify_products ADD COLUMN IF NOT EXISTS shop_id UUID REFERENCES shopify_shops(id) ON DELETE CASCADE; -CREATE INDEX IF NOT EXISTS idx_shopify_products_shop_id ON shopify_products(shop_id); - -CREATE TABLE partnerships ( - id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), - creator_id UUID REFERENCES auth.users(id), - company_id UUID REFERENCES companies(id), - short_id UUID REFERENCES youtube_shorts(id), - status TEXT DEFAULT 'pending', -- e.g., pending, confirmed, rejected - created_at TIMESTAMPTZ DEFAULT NOW(), - confirmed_at TIMESTAMPTZ, - UNIQUE(creator_id, company_id, short_id) +-- ============================================================================= +-- Maatchaa — current database schema (public schema) +-- +-- This file is a faithful snapshot of the LIVE Supabase database, regenerated +-- from the running instance. The previous version of this file was stale (it +-- predated most tables/columns and carried a "FIXME: actually update our DB" +-- note). A fresh database created from this file should structurally match prod. +-- +-- Tables are annotated with their real-world status: +-- [ACTIVE] used by the running app (frontend and/or backend) +-- [LEGACY] older pipeline, ~0 rows, still referenced in backend/API.py +-- [DEAD] no code references and no data — safe drop candidate +-- [WORKER] background-pipeline scratch/staging (ideally not in `public`) +-- +-- A proposed cleaner schema and a phased migration plan live in +-- docs/schema-redesign.md. THIS file documents reality as-is; it does not +-- redesign anything. +-- +-- RLS: row level security is enabled on every table here (see the RLS section +-- at the bottom, and backend/data/enable_rls_security.sql for the migration +-- that introduced it). +-- ============================================================================= + +-- Extensions in use --------------------------------------------------------- -- +create extension if not exists "uuid-ossp"; -- uuid_generate_v4() +create extension if not exists "pgcrypto"; -- gen_random_uuid() + +-- Enums --------------------------------------------------------------------- -- +do $$ begin + create type match_status as enum ('awaiting', 'success', 'rejected'); +exception when duplicate_object then null; end $$; + + +-- ============================================================================= +-- IDENTITY +-- ============================================================================= + +-- [ACTIVE] Merchant / Shopify store owner. NOTE: this is one of THREE tables +-- describing a merchant (also shopify_shops + shopify_oauth_tokens); access_token +-- is duplicated between companies and shopify_oauth_tokens. See redesign doc. +create table companies ( + company_id uuid primary key default gen_random_uuid(), + shop_name text not null, + access_token text not null, -- SENSITIVE + ingested boolean not null default false, + last_ingest_attempt timestamptz, + keywords text[], + created_at timestamptz not null default timezone('utc', now()), + updated_at timestamptz +); + +-- [ACTIVE] Shopify store metadata (rich profile). Separate from `companies`. +create table shopify_shops ( + id uuid primary key default uuid_generate_v4(), + company_id uuid, + shop_domain text not null unique, + shop_id bigint, + shop_name text, + shop_owner text, + email text, + domain text, + country text, + currency text, + timezone text, + iana_timezone text, + plan_name text, + plan_display_name text, + shop_created_at timestamptz, + province text, + city text, + address1 text, + zip text, + phone text, + latitude numeric, + longitude numeric, + primary_locale text, + money_format text, + money_with_currency_format text, + weight_unit text, + myshopify_domain text, + last_synced_at timestamptz, + created_at timestamptz default now(), + updated_at timestamptz default now() +); +create index idx_shopify_shops_company_id on shopify_shops (company_id); +create index idx_shopify_shops_shop_domain on shopify_shops (shop_domain); + +-- [ACTIVE] Shopify OAuth access tokens. access_token also lives on companies. +create table shopify_oauth_tokens ( + id uuid primary key default uuid_generate_v4(), + company_id uuid, + shop_domain text not null unique, + access_token text not null, -- SENSITIVE + scope text not null, + token_type text default 'offline', + is_active boolean default true, + products_synced boolean default false, + last_product_sync timestamptz, + product_count integer default 0, + expires_at timestamptz, + created_at timestamptz default now(), + updated_at timestamptz default now() +); +create index idx_shopify_oauth_tokens_company_id on shopify_oauth_tokens (company_id); +create index idx_shopify_oauth_tokens_shop_domain on shopify_oauth_tokens (shop_domain); +create index idx_shopify_oauth_tokens_is_active on shopify_oauth_tokens (is_active); + +-- [ACTIVE] Short-lived CSRF state for the Shopify OAuth handshake. +create table shopify_oauth_states ( + id uuid primary key default uuid_generate_v4(), + state text not null unique, + company_id uuid, + shop_domain text not null, + redirect_url text, + used boolean default false, + created_at timestamptz default now(), + expires_at timestamptz default now() + interval '10 minutes' +); +create index idx_shopify_oauth_states_state on shopify_oauth_states (state); +create index idx_shopify_oauth_states_company_id on shopify_oauth_states (company_id); +create index idx_shopify_oauth_states_expires_at on shopify_oauth_states (expires_at); + +-- [ACTIVE] Creator (YouTube) OAuth tokens. Creator identity is otherwise smeared +-- across creator_videos / partnerships as loose columns (no `creators` table). +create table creator_tokens ( + id uuid primary key default uuid_generate_v4(), + channel_id text not null unique, + email text not null, + access_token text not null, -- SENSITIVE + refresh_token text not null, -- SENSITIVE + expires_at timestamptz not null, + created_at timestamptz default now(), + updated_at timestamptz default now() +); +create index creator_tokens_channel_id_idx on creator_tokens (channel_id); +create index creator_tokens_email_idx on creator_tokens (email); + + +-- ============================================================================= +-- CATALOG +-- ============================================================================= + +-- [ACTIVE] The product catalog the live app uses (thin: title/desc/image/price +-- + embeddings). 49 rows in prod. +create table company_products ( + id uuid primary key default uuid_generate_v4(), + company_id uuid not null, + shop_domain text not null, + title text not null, + description text, + image text, + price numeric, + pinecone_id text, + search_keywords jsonb default '[]'::jsonb, -- pre-generated YT search keywords + synced_at timestamptz default now(), + updated_at timestamptz default now() +); +create index idx_company_products_company_id on company_products (company_id); +create index idx_company_products_shop on company_products (shop_domain); +create index idx_company_products_search_keywords on company_products using gin (search_keywords); + +-- [LEGACY] Rich Shopify product table (variants/images/vendor/handle). Wired in +-- backend/API.py but holds 0 rows — superseded by company_products. +create table shopify_products ( + id uuid primary key default uuid_generate_v4(), + shop_id uuid references shopify_shops(id) on delete cascade, + shopify_id bigint not null unique, + store_url text not null, + title text not null, + handle text not null, + body_html text, + vendor text, + product_type text, + tags text[], + variants jsonb not null, + images jsonb not null, + options jsonb, + published_at timestamptz, + last_matched_at timestamptz, + match_count integer default 0, + created_at timestamptz default now(), + updated_at timestamptz default now() +); +create index idx_shopify_products_shop_id on shopify_products (shop_id); +create index idx_shopify_products_store_url on shopify_products (store_url); +create index idx_shopify_products_vendor on shopify_products (vendor); +create index idx_shopify_products_product_type on shopify_products (product_type); + +-- [ACTIVE] Creator videos discovered/analysed by the pipeline. 23 rows. +create table creator_videos ( + id uuid primary key default uuid_generate_v4(), + video_id text not null unique, -- YouTube video id + url text not null, + title text not null, + description text, + thumbnail text, + channel_title text not null, + channel_id text not null, + email text, + published_at timestamptz, + views integer default 0, + likes integer default 0, + analysis jsonb, + pinecone_id text, + indexed_at timestamptz default now() ); +create index idx_creator_videos_video_id on creator_videos (video_id); +create index idx_creator_videos_channel_id on creator_videos (channel_id); +create index idx_creator_videos_indexed_at on creator_videos (indexed_at desc); + + +-- ============================================================================= +-- MATCHING & WORKFLOW +-- ============================================================================= + +-- [ACTIVE] The live matching table: product <-> creator video. 31 rows. +create table product_creator_matches ( + id uuid primary key default uuid_generate_v4(), + product_id uuid references company_products(id) on delete cascade, + video_id text references creator_videos(video_id) on delete cascade, + source_keyword text, + similarity_score numeric, + relevance_score numeric, + relevance_reasoning text, + created_at timestamptz default now() +); +create index idx_matches_product on product_creator_matches (product_id); +create index idx_matches_video on product_creator_matches (video_id); +create index idx_matches_score on product_creator_matches (similarity_score desc); + +-- [LEGACY] v1 matching table. No real .table()/.from() calls; 0 rows. +-- DROP candidate (see docs/schema-redesign.md, Phase 1). +create table matches ( + match_id uuid primary key default gen_random_uuid(), + company_id uuid default gen_random_uuid() + references companies(company_id) on update cascade on delete cascade, + video_url text, + status match_status, + created_at timestamptz default now() +); +create index idx_company_video_url on matches (company_id, video_url); + +-- [LEGACY] v2 matching table (Shorts-centric). Referenced in backend/API.py; 0 rows. +create table product_matches ( + id uuid primary key default uuid_generate_v4(), + product_id uuid references shopify_products(id) on delete cascade, + short_id uuid references youtube_shorts(id) on delete cascade, + match_score double precision, + matched_at timestamptz default now(), + unique (product_id, short_id) +); +create index idx_product_matches_product_id on product_matches (product_id); +create index idx_product_matches_short_id on product_matches (short_id); + +-- [ACTIVE] Per-merchant reel swipe history (dismissed / partnered). 15 rows. +-- The UNIQUE(company_id, video_id) backs the upsert in +-- frontend/src/app/api/reels/interactions (onConflict: company_id,video_id). +create table reel_interactions ( + id uuid primary key default uuid_generate_v4(), + company_id uuid not null, + video_id text not null, -- references creator_videos.video_id + interaction_type text not null, -- 'dismissed' | 'partnered' + created_at timestamptz default now(), + unique (company_id, video_id) +); +create index idx_reel_interactions_company_id on reel_interactions (company_id); +create index idx_reel_interactions_video_id on reel_interactions (video_id); +create index idx_reel_interactions_type on reel_interactions (interaction_type); +create index idx_reel_interactions_created_at on reel_interactions (created_at desc); + +-- [ACTIVE] CRM board: creator partnerships from discovery -> active. 14 rows. +-- Intentionally denormalized — creator_*/video_* are SNAPSHOT strings (no FKs to +-- creators/videos). The redesign keeps the snapshot but adds optional FKs. +create table partnerships ( + id uuid primary key default uuid_generate_v4(), + company_id uuid not null, + -- creator snapshot (strings, not FKs) + creator_name text, + creator_handle text, + creator_email text, + creator_avatar text, + creator_channel_url text, + -- video snapshot + video_id text, + video_title text, + video_url text, + video_thumbnail text, + -- workflow + status text default 'to_contact', -- to_contact|contacted|in_discussion|active|closed + email_sent boolean default false, + email_draft text, + contract_drafted boolean default false, + contract_sent boolean default false, + contract_signed boolean default false, + contract_url text, + contract_data jsonb, + affiliate_link_generated boolean default false, + affiliate_link text, + discount_code text, + commission_rate numeric, + payment_terms text, + notes text, + matched_products jsonb default '[]'::jsonb, + -- analytics + views integer default 0, + likes integer default 0, + comments integer default 0, + clicks integer default 0, + sales integer default 0, + revenue numeric default 0.00, + performance_data jsonb default '{}'::jsonb, + -- timestamps + contacted_at timestamptz, + discussion_started_at timestamptz, + activated_at timestamptz, + closed_at timestamptz, + last_contact_date timestamptz, + created_at timestamptz default now(), + updated_at timestamptz default now() +); +create index idx_partnerships_company_id on partnerships (company_id); +create index idx_partnerships_status on partnerships (status); +create index idx_partnerships_created_at on partnerships (created_at desc); +create index idx_partnerships_creator_email on partnerships (creator_email) where creator_email is not null; + +-- [ACTIVE] Public showcase page for a confirmed short. 1 row. Backs the public +-- /product/[slug] route. creator_id references auth.users, which the app does +-- NOT populate today (auth is mocked). +create table youtube_shorts ( + id uuid primary key default uuid_generate_v4(), + youtube_id text not null unique, + creator_id uuid references auth.users(id), + slug text unique default random_slug_6(), + title text not null, + status text default 'pending' + check (status in ('pending', 'confirmed', 'rejected')), + products jsonb, + showcase_images text[], + main_image_url text, + reviews bigint, + email text, + confirmed_at timestamp, + created_at timestamptz default now() +); + +-- [DEAD] Public partnership page. Zero references in any file; 0 rows. +-- DROP candidate (see docs/schema-redesign.md, Phase 1). +create table product_pages ( + product_page_id uuid primary key default gen_random_uuid(), + partnership_id uuid unique default gen_random_uuid(), + page_slug text not null unique, + title text, + products jsonb, + main_image_url text, + created_at timestamptz +); + + +-- ============================================================================= +-- SUPPORT +-- ============================================================================= + +-- [ACTIVE] Landing-page waitlist signups (INSERT-only from the browser). +create table waitlist ( + id uuid primary key default gen_random_uuid(), + email text not null unique, + name text, + created_at timestamptz default now() +); +create index idx_waitlist_email on waitlist (email); +create index idx_waitlist_created_at on waitlist (created_at desc); + + +-- ============================================================================= +-- WORKER / STAGING (ideally lives in a separate `worker` schema, not `public`) +-- ============================================================================= + +-- [WORKER] Append-only URL dedup log for the discovery worker. 31 rows. +-- Written but never read back by application code. +create table yt_shorts_all ( + id uuid primary key, + url text, + created_at timestamptz default now() +); + +-- [WORKER] Staging row for a short pending processing. 0 rows. Note the +-- hardcoded personal-email default on `email` — pipeline cruft. +create table yt_shorts_pending ( + id uuid primary key default gen_random_uuid(), + company text default ''::text, + channel_id text, + short_id text, + yt_short_url text, + product_text text[], + product_imgs text[], + cached_query text, + email text not null default '"aayankarmali@gmail.com"'::text, + created_at timestamptz default now() +); + + +-- ============================================================================= +-- FUNCTIONS (search_path pinned — see enable_rls_security.sql) +-- ============================================================================= + +create or replace function cleanup_expired_oauth_states() +returns void +language plpgsql +set search_path = public +as $$ +begin + delete from shopify_oauth_states where expires_at < now(); +end; +$$; + +create or replace function random_slug_6() +returns text +language sql +set search_path = public +as $$ + select string_agg( + substr('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', + floor(random() * 62 + 1)::int, 1), '') + from generate_series(1, 6); +$$; + +create or replace function update_updated_at_column() +returns trigger +language plpgsql +set search_path = public +as $$ +begin + new.updated_at = timezone('utc'::text, now()); + return new; +end; +$$; + +create or replace function update_partnerships_updated_at() +returns trigger +language plpgsql +set search_path = public +as $$ +begin + new.updated_at = now(); + return new; +end; +$$; + + +-- ============================================================================= +-- ROW LEVEL SECURITY +-- See backend/data/enable_rls_security.sql for the authoritative, commented +-- definition (enable RLS on every table + minimal anon policies). Summary: +-- anon SELECT : creator_videos, product_creator_matches, youtube_shorts, +-- yt_shorts_pending, reel_interactions +-- anon DELETE : reel_interactions +-- anon INSERT : waitlist +-- everything else => server-only (service-role bypasses RLS) +-- ============================================================================= diff --git a/docs/schema-redesign.md b/docs/schema-redesign.md new file mode 100644 index 0000000..e181c44 --- /dev/null +++ b/docs/schema-redesign.md @@ -0,0 +1,178 @@ +# Database schema redesign + +Status: **proposal** — nothing here has been executed. The live schema is captured +as-is in [`backend/data/schema.sql`](../backend/data/schema.sql). This document +evaluates how the database is used today and proposes a cleaner target, plus a +phased, low-risk migration path. + +Row counts below are from the live DB (2026-05-31); usage is from grepping +`.from("…")` / `.table("…")` across `frontend/src` and `backend`. + +--- + +## 1. What's wrong today + +### 1a. Three generations of the same matching pipeline + +The same "match a product to a piece of creator content" concept exists three +times. Only the third is alive. + +| Gen | Product table | Match table | Content table | Rows (P / M / C) | Status | +|-----|---------------|-------------|---------------|------------------|--------| +| v1 | — | `matches` | (video_url string) | – / 0 / – | dead | +| v2 | `shopify_products` | `product_matches` | `youtube_shorts` | 0 / 0 / 1 | legacy | +| **v3 (live)** | **`company_products`** | **`product_creator_matches`** | **`creator_videos`** | **49 / 31 / 23** | **active** | + +The demo runs entirely on v3. v1/v2 are still referenced in `backend/API.py` but +hold ~0 rows. + +### 1b. A merchant is spread across three tables + +`companies`, `shopify_shops`, and `shopify_oauth_tokens` all describe one +merchant. The Shopify **access token is stored twice** (`companies.access_token` +*and* `shopify_oauth_tokens.access_token`). `shop_domain` is the real natural key +everywhere, but `company_id` (sometimes a FK, sometimes a bare UUID) is used to +join. + +### 1c. A creator is not an entity + +Creator data is smeared across four tables as loose columns: + +- `creator_videos` → `channel_id`, `channel_title`, `email` +- `creator_tokens` → `channel_id` + OAuth secrets +- `partnerships` → `creator_name`, `creator_handle`, `creator_email`, + `creator_avatar` as **plain strings** +- `youtube_shorts.creator_id` → FK to `auth.users`, which the app never + populates (auth is mocked — see the auth-is-mocked note) + +There is no single `creators` row to join to. + +### 1d. Two product tables + +`shopify_products` (rich: variants/images/vendor/handle, **0 rows**) vs +`company_products` (thin: title/desc/image/price + embeddings, **49 rows, +live**). The thin one won. + +### 1e. Two dead "public page" concepts + +`product_pages` (**0 rows, 0 references anywhere**) and `youtube_shorts` (backs +the public `/product/[slug]` page, 1 row) both model "a public page for a +partnership/short." + +### 1f. Worker scratch tables in the core schema + +`yt_shorts_all` (append-only URL dedup log, 31 rows, never read back) and +`yt_shorts_pending` (staging, 0 rows, has a hardcoded personal-email default) +are pipeline internals sitting in `public`, so they're exposed via the API. + +--- + +## 2. Proposed target (18 → 11 tables) + +Organized around real domain entities: **Merchant → Product**, **Creator → +Video**, and the **Match / Partnership** that connect them. + +``` +IDENTITY + merchants (was companies + shopify_shops + shopify_oauth_tokens) + id pk · shop_domain UNIQUE (natural key) + shop_name, email, plan, country, currency, timezone, + ingested, products_synced, product_count, keywords[] + merchant_oauth (secrets split out so RLS can expose a profile w/o tokens) + merchant_id fk→merchants · access_token, scope, expires_at, is_active + creators (NEW — consolidates creator identity) + id pk · channel_id UNIQUE · channel_title, email, avatar, channel_url + creator_oauth (was creator_tokens) + creator_id fk→creators · access_token, refresh_token, expires_at + +CATALOG + products (was company_products; absorb useful shopify_products cols) + id pk · merchant_id fk · shopify_product_id + title, description, image, price, handle, vendor, tags[], + pinecone_id, search_keywords jsonb + videos (was creator_videos) + id pk · creator_id fk→creators · youtube_id UNIQUE + url, title, thumbnail, published_at, views, likes, analysis jsonb, pinecone_id + +MATCHING & WORKFLOW + matches (was product_creator_matches; reuse the clean name once v1 dropped) + id pk · product_id fk→products · video_id fk→videos + similarity_score, relevance_score, reasoning · UNIQUE(product_id, video_id) + interactions (was reel_interactions) + id pk · merchant_id fk · video_id fk · type · UNIQUE(merchant_id, video_id) + partnerships (kept; add FKs, keep snapshot cols — see note) + id pk · merchant_id fk · creator_id fk · video_id fk + status, *_at, contract_*, affiliate_*, matched_products jsonb, analytics cols + showcase_pages (merges youtube_shorts' public-page role + product_pages) + id pk · slug UNIQUE · partnership_id fk · title, products jsonb, main_image_url, status + +SUPPORT + waitlist · shopify_oauth_states (transient CSRF) + +WORKER (move to a `worker` schema, OUT of public/the API surface) + ingest_log (was yt_shorts_all) · ingest_staging (was yt_shorts_pending) +``` + +**Dropped from `public` (7):** `matches`(v1), `product_matches`, +`shopify_products`, `youtube_shorts`, `product_pages`, `yt_shorts_all`, +`yt_shorts_pending` (last two move to a `worker` schema). + +### Why this is better +- **One merchant, one creator** → joins instead of string matching; partnerships + reference `creators`/`videos` by FK. +- **OAuth secrets isolated** in `*_oauth` tables → RLS can expose a public + profile while keeping tokens server-only (complements the RLS work already + applied — see [`enable_rls_security.sql`](../backend/data/enable_rls_security.sql)). +- **`interactions` keeps its real `UNIQUE(merchant_id, video_id)`** — the reels + upsert (`onConflict: company_id,video_id`) depends on it. +- **Staging out of `public`** → worker scratch tables stop being API-reachable. + +--- + +## 3. Two honest caveats + +1. **`partnerships` denormalization is partly intentional.** It's a CRM board; + snapshotting the creator's name/avatar at partnership time is legitimate (the + channel can change later). Add `creator_id`/`video_id` FKs for joins but + **keep** the snapshot columns. The ~1,700-line partnerships page reads those + strings directly, so fully normalizing it is a separate FE effort. + +2. **This is a migration, not a rename.** `backend/API.py` still calls v2 tables. + The backend isn't deployed and auth is mocked, so risk is low — but it's real + code surgery, best done in phases. + +--- + +## 4. Phased migration plan + +### Phase 1 — Remove confirmed-dead tables (safe, isolated) +- `DROP TABLE public.product_pages;` (0 rows, 0 references) +- `DROP TABLE public.matches;` (0 rows, only a coincidental dict key) +- No application code changes required. Apply via a dedicated migration after review. + +### Phase 2 — Retire the v2 pipeline +- Confirm `shopify_products` / `product_matches` / `youtube_shorts` are truly + unused at runtime (they're wired in `backend/API.py` but empty). +- Either delete those endpoints or repoint them at the v3 tables. +- Then drop the v2 tables. Requires backend edits + redeploy. + +### Phase 3 — Consolidate identity +- Introduce `creators` and `merchants`; backfill from existing rows + (`creator_videos`/`creator_tokens` → `creators`; `companies`+`shopify_shops`+ + `shopify_oauth_tokens` → `merchants`/`merchant_oauth`). +- Add FKs on `partnerships`, `interactions`, `products`, `videos`. +- Migrate reads/writes in frontend + backend; drop the old identity tables. + +### Phase 4 — Move worker tables out of `public` +- Create a `worker` schema; move `yt_shorts_all` → `worker.ingest_log` and + `yt_shorts_pending` → `worker.ingest_staging` (removes them from the API). + +Each phase is independently shippable and reversible up to the `DROP`. + +--- + +## 5. Recommended immediate step + +Phase 1 only: a `DROP TABLE product_pages, matches;` migration, reviewed and +applied on its own. Everything else waits until the backend is being actively +worked on, so schema churn lands alongside the code that depends on it.