In [0]:
-- =========================================================
-- GOLD: ML - feature_ml_rating
-- Objetivo: predecir review_scores_rating (target_rating)
-- Fuentes: airbnb.silver.listings, airbnb.silver.hosts, airbnb.silver.listing_amenities
-- =========================================================

-- 1) Asegurar esquema Gold
CREATE SCHEMA IF NOT EXISTS airbnb.gold;

-- 2) Construcción de features + target
CREATE OR REPLACE TABLE airbnb.gold.feature_ml_rating AS
WITH amen AS (
  SELECT
    bk_listing_id,
    COUNT(*) AS num_amenities
  FROM airbnb.silver.listing_amenities
  GROUP BY bk_listing_id
)
SELECT
  /* Usamos la clave de negocio como identificador estable */
  l.bk_listing_id                                  AS listing_id,

  -- ===== TARGET =====
  CAST(l.review_scores_rating AS INT)              AS target_rating,

  -- ===== FEATURES NUMÉRICAS =====
  TRY_CAST(l.price AS DECIMAL(10,2))               AS price,
  TRY_CAST(l.accommodates AS INT)                  AS accommodates,
  TRY_CAST(l.bedrooms AS INT)                      AS bedrooms,
  TRY_CAST(l.beds AS INT)                          AS beds,
  TRY_CAST(l.bathrooms AS DECIMAL(3,1))            AS bathrooms,
  TRY_CAST(l.number_of_reviews AS INT)             AS number_of_reviews,
  COALESCE(a.num_amenities, 0)                     AS num_amenities,

  -- ===== FEATURES CATEGÓRICAS / BOOLEANAS =====
  CASE
    WHEN CAST(h.host_is_superhost AS STRING) IN ('true','True','1')  THEN TRUE
    WHEN CAST(h.host_is_superhost AS STRING) IN ('false','False','0') THEN FALSE
    ELSE TRY_CAST(h.host_is_superhost AS BOOLEAN)
  END                                              AS host_is_superhost,

  l.property_type,
  l.room_type,
  l.country

FROM airbnb.silver.listings l
LEFT JOIN airbnb.silver.hosts h
  -- Unión por surrogate si existe en ambas, de lo contrario por business key
  ON h.host_sk = l.host_sk
  -- OR h.bk_host_id = l.bk_host_id   -- (alternativa por clave de negocio, si lo prefieres)
LEFT JOIN amen a
  ON a.bk_listing_id = l.bk_listing_id

-- 3) Filtros mínimos de calidad
WHERE l.review_scores_rating IS NOT NULL
  AND l.price IS NOT NULL
  AND TRY_CAST(l.price AS DECIMAL(10,2)) > 0;
