## Create Unity Catalog (UC) functions to use as tools with the model

In [0]:
%sql

-- A) Per-user metrics (table)
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.get_user_metrics(p_user_id INT)
RETURNS TABLE (
  user_id INT,
  avg_cycle_length_days DOUBLE,
  avg_period_length_days DOUBLE,
  cycles_count INT
)
RETURN
SELECT user_id, avg_cycle_length_days, avg_period_length_days, cycles_count
FROM wwc2025.period_pipeline.gold_user_cycle_metrics
WHERE user_id = p_user_id;

-- B) Abnormal cycles (table)  -- (limit to be applied by caller)
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.get_abnormal_cycles()
RETURNS TABLE (
  user_id INT,
  cycle_start_date DATE,
  cycle_end_date DATE,
  period_length_days INT,
  interval_since_prev_end_days INT,
  is_abnormally_long_period BOOLEAN,
  is_short_interval BOOLEAN
)
RETURN
SELECT user_id, cycle_start_date, cycle_end_date, period_length_days,
       interval_since_prev_end_days, is_abnormally_long_period, is_short_interval
FROM wwc2025.period_pipeline.gold_cycles
WHERE is_abnormally_long_period OR is_short_interval
ORDER BY cycle_start_date DESC;

-- C) Short intervals (table)  -- param for the gap only
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.get_short_intervals(p_max_gap_days INT)
RETURNS TABLE (
  user_id INT,
  cycle_start_date DATE,
  cycle_end_date DATE,
  interval_since_prev_end_days INT
)
RETURN
SELECT user_id, cycle_start_date, cycle_end_date, interval_since_prev_end_days
FROM wwc2025.period_pipeline.gold_cycles
WHERE interval_since_prev_end_days IS NOT NULL
  AND interval_since_prev_end_days < p_max_gap_days
ORDER BY cycle_start_date DESC;

-- D) Long bleeds (table)  -- param for min days only
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.get_long_bleeds(p_min_days INT)
RETURNS TABLE (
  user_id INT,
  cycle_start_date DATE,
  cycle_end_date DATE,
  period_length_days INT
)
RETURN
SELECT user_id, cycle_start_date, cycle_end_date, period_length_days
FROM wwc2025.period_pipeline.gold_cycles
WHERE period_length_days >= p_min_days
ORDER BY period_length_days DESC, cycle_start_date DESC;

-- E) Users by avg cycle length (table)
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.find_users_by_avg_cycle_length(p_min INT, p_max INT)
RETURNS TABLE (
  user_id INT,
  avg_cycle_length_days DOUBLE,
  avg_period_length_days DOUBLE,
  cycles_count INT
)
RETURN
SELECT user_id, avg_cycle_length_days, avg_period_length_days, cycles_count
FROM wwc2025.period_pipeline.gold_user_cycle_metrics
WHERE avg_cycle_length_days BETWEEN p_min AND p_max
ORDER BY avg_cycle_length_days DESC;

-- F) Predict next period (table function returning one row for the user)
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.predict_next_period_row(p_user_id INT)
RETURNS TABLE (
  user_id INT,
  last_cycle_end_date DATE,
  avg_len INT,
  predicted_next_period DATE
)
RETURN
WITH le AS (
  SELECT user_id, MAX(cycle_end_date) AS last_cycle_end_date
  FROM wwc2025.period_pipeline.gold_cycles
  GROUP BY user_id
),
gm AS (
  SELECT user_id, CAST(ROUND(avg_cycle_length_days) AS INT) AS avg_len
  FROM wwc2025.period_pipeline.gold_user_cycle_metrics
)
SELECT
  gm.user_id,
  le.last_cycle_end_date,
  gm.avg_len,
  CASE
    WHEN le.last_cycle_end_date IS NULL OR gm.avg_len IS NULL THEN NULL
    ELSE DATEADD(day, gm.avg_len, le.last_cycle_end_date)
  END AS predicted_next_period
FROM le
JOIN gm ON le.user_id = gm.user_id
WHERE gm.user_id = p_user_id;

-- G) User wellness context (life stage, current phase, prefs, top moods/symptoms)
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.get_user_wellness_context(p_user_id INT)
RETURNS TABLE (
  user_id INT,
  life_stage STRING,
  phase STRING,
  vegan BOOLEAN,
  dairy_free BOOLEAN,
  caffeine_free BOOLEAN,
  sensitive_skin BOOLEAN,
  nut_allergy BOOLEAN,
  gluten_free BOOLEAN,
  sexually_active BOOLEAN,
  temperament STRING,
  top_moods_90d ARRAY<STRING>,
  top_symptoms_90d ARRAY<STRING>
)
RETURN
SELECT
  user_id, life_stage, phase,
  vegan, dairy_free, caffeine_free, sensitive_skin, nut_allergy, gluten_free,
  sexually_active, temperament,
  top_moods_90d, top_symptoms_90d
FROM wwc2025.period_pipeline.gold_user_wellness_context
WHERE user_id = p_user_id;

-- H) Phase- & life-stage-aware wellness bundle (meals, supplements, workout, hygiene)
--    NOTE: Uses ROW_NUMBER() + rn filter instead of LIMIT p_items.
CREATE OR REPLACE FUNCTION wwc2025.period_pipeline.recommend_wellness_bundle(p_user_id INT, p_items INT)
RETURNS TABLE (
  user_id INT,
  phase STRING,
  life_stage STRING,
  section STRING,   -- "meal"|"supplement"|"workout"|"hygiene"
  id STRING,
  name STRING,
  meta STRING       -- JSON (tags, price/intensity/type/etc. as applicable)
)
RETURN
WITH
ctx AS (
  SELECT * FROM wwc2025.period_pipeline.gold_user_wellness_context WHERE user_id = p_user_id
),
rules AS (
  SELECT * FROM wwc2025.period_pipeline.silver_wellness_phase_rules
),

-- Meals (rank by tag overlap; respect diet prefs)
m_ranked AS (
  SELECT
    c.user_id, c.phase, c.life_stage,
    'meal' AS section,
    mc.meal_id AS id, mc.name,
    TO_JSON(NAMED_STRUCT('tags', mc.tags)) AS meta,
    ROW_NUMBER() OVER (
      ORDER BY SIZE(ARRAY_INTERSECT(mc.tags, r.prefer_meal_tags)) DESC, mc.name ASC
    ) AS rn
  FROM ctx c
  JOIN rules r ON r.phase = c.phase AND r.life_stage = c.life_stage
  JOIN wwc2025.period_pipeline.silver_meal_catalog mc
  WHERE SIZE(ARRAY_INTERSECT(mc.tags, r.prefer_meal_tags)) > 0
    AND ( (c.vegan        = TRUE AND ARRAY_CONTAINS(mc.tags, 'vegan'))        OR c.vegan        = FALSE )
    AND ( (c.gluten_free  = TRUE AND ARRAY_CONTAINS(mc.tags, 'gluten-free'))  OR c.gluten_free  = FALSE )
    AND ( (c.dairy_free   = TRUE AND NOT ARRAY_CONTAINS(mc.tags, 'dairy'))    OR c.dairy_free   = FALSE )
),
m AS (
  SELECT user_id, phase, life_stage, section, id, name, meta FROM m_ranked WHERE rn <= p_items
),

-- Supplements (rank by overlap; respect caffeine-free pref)
s_ranked AS (
  SELECT
    c.user_id, c.phase, c.life_stage,
    'supplement' AS section,
    sc.supplement_id AS id, sc.name,
    TO_JSON(NAMED_STRUCT('tags', sc.tags)) AS meta,
    ROW_NUMBER() OVER (
      ORDER BY SIZE(ARRAY_INTERSECT(sc.tags, r.prefer_supp_tags)) DESC, sc.name ASC
    ) AS rn
  FROM ctx c
  JOIN rules r ON r.phase = c.phase AND r.life_stage = c.life_stage
  JOIN wwc2025.period_pipeline.silver_supplement_catalog sc
  WHERE (SIZE(ARRAY_INTERSECT(sc.tags, r.prefer_supp_tags)) > 0 OR SIZE(r.prefer_supp_tags) = 0)
    AND ( (c.caffeine_free = TRUE AND ARRAY_CONTAINS(sc.tags, 'caffeine-free')) OR c.caffeine_free = FALSE )
),
s AS (
  SELECT user_id, phase, life_stage, section, id, name, meta FROM s_ranked WHERE rn <= p_items
),

-- Workout (one by rule)
w_ranked AS (
  SELECT
    c.user_id, c.phase, c.life_stage,
    'workout' AS section,
    wc.workout_id AS id, wc.name,
    TO_JSON(NAMED_STRUCT('intensity', wc.intensity, 'focus', wc.focus, 'tags', wc.tags)) AS meta,
    ROW_NUMBER() OVER (ORDER BY wc.name ASC) AS rn
  FROM ctx c
  JOIN rules r ON r.phase = c.phase AND r.life_stage = c.life_stage
  JOIN wwc2025.period_pipeline.silver_workout_catalog wc
  WHERE wc.intensity = r.workout_intensity
),
w AS (
  SELECT user_id, phase, life_stage, section, id, name, meta FROM w_ranked WHERE rn <= 1
),

-- Hygiene (respect sensitive-skin + rule types)
h_ranked AS (
  SELECT
    c.user_id, c.phase, c.life_stage,
    'hygiene' AS section,
    hc.hygiene_id AS id, hc.name,
    TO_JSON(NAMED_STRUCT('type', hc.type, 'tags', hc.tags)) AS meta,
    ROW_NUMBER() OVER (ORDER BY hc.name ASC) AS rn
  FROM ctx c
  JOIN rules r ON r.phase = c.phase AND r.life_stage = c.life_stage
  JOIN wwc2025.period_pipeline.silver_hygiene_catalog hc
  WHERE ARRAY_CONTAINS(r.hygiene_types, hc.type)
    AND ( (c.sensitive_skin = TRUE AND ARRAY_CONTAINS(hc.tags, 'sensitive-skin')) OR c.sensitive_skin = FALSE )
),
h AS (
  SELECT user_id, phase, life_stage, section, id, name, meta FROM h_ranked WHERE rn <= p_items
)

SELECT * FROM m
UNION ALL SELECT * FROM s
UNION ALL SELECT * FROM w
UNION ALL SELECT * FROM h;
