In [0]:
CREATE OR REPLACE STREAMING TABLE bronze_food_inspection
TBLPROPERTIES(
  'delta.enableChangeDataFeed' = 'true'
)
AS
SELECT
  inspection_id,
  dba_name,
  aka_name,
  license_no,
  facility_type,
  risk,
  address,
  city,
  state,
  zip,
  inspection_date,
  inspection_type,
  results,
  violations,
  latitude,
  longitude,
  current_timestamp() AS load_dt,
  _metadata.file_path  AS source_file_path,
  _metadata.file_name  AS source_file_name
FROM STREAM cloud_files('/Volumes/workspace/food_inspection_project/chicago',
  'csv',
    map(
    'cloudFiles.inferColumnTypes', 'true',
    'cloudFiles.schemaLocation', '/Volumes/workspace/food_inspection_project/chicago/bronze_food_inspection_chicago',
  'header', 'true'
  )
);
 

In [0]:
CREATE OR REFRESH STREAMING TABLE silver_food_inspection
TBLPROPERTIES(
  'delta.enableChangeDataFeed' = 'true'
)
AS
WITH base_filtered AS (
  SELECT
    inspection_id,
    dba_name,
    aka_name,
    license_no,
    facility_type,
    risk,
    address,
    city,
    state,
    zip,
    inspection_date,
    inspection_type,
    results,
    violations,
    latitude,
    longitude,
    load_dt,
    source_file_path,
    source_file_name
  FROM STREAM bronze_food_inspection
  WHERE
    -- business rules
    dba_name IS NOT NULL
    AND inspection_date IS NOT NULL
    AND inspection_type IS NOT NULL
    AND zip IS NOT NULL
    AND CAST(zip AS STRING) RLIKE '^[0-9]{5}$'
    AND UPPER(city) = 'CHICAGO'
    AND results IS NOT NULL
    AND violations IS NOT NULL
    AND TRIM(violations) <> ''
),

violations_exploded AS (
  SELECT
    bf.*,
    TRIM(violation_raw) AS violation_raw
  FROM base_filtered bf
  LATERAL VIEW explode(
    split(violations, '\\|')
  ) v AS violation_raw
),

violations_parsed AS (
  SELECT
    inspection_id,
    dba_name AS restaurant_name,
    aka_name,
    license_no,
    facility_type,
    risk,
    address,
    city,
    state,
    zip,
    inspection_date,
    inspection_type,
    results,
    latitude,
    longitude,
    load_dt,
    source_file_path,
    source_file_name,
    violation_raw,
    CAST(regexp_extract(violation_raw, '^(\\d+)', 1) AS INT) AS violation_code,
    NULLIF(
      regexp_extract(violation_raw, '^\\s*\\d+\\.\\s*(.*)$', 1),
      ''
    ) AS violation_description
  FROM violations_exploded
  WHERE
    violation_raw IS NOT NULL
    AND TRIM(violation_raw) <> ''
)

SELECT DISTINCT
  inspection_id,
  restaurant_name,
  aka_name,
  license_no,
  facility_type,
  risk,
  address,
  city,
  state,
  zip,
  inspection_date,
  inspection_type,
  results,
  latitude,
  longitude,
  load_dt,
  source_file_path,
  source_file_name,
  violation_raw,
  violation_code,
  violation_description
FROM violations_parsed;


In [0]:
-- Business_dim feeder
CREATE OR REFRESH LIVE TABLE silver_business_chi
AS
SELECT DISTINCT
  xxhash64(license_no, restaurant_name, aka_name) AS business_key,
  license_no      AS license_number,
  restaurant_name AS dba_name,
  aka_name
FROM STREAM silver_food_inspection;



In [0]:
-- Facility_dim feeder
CREATE OR REFRESH LIVE TABLE silver_facility_chi
AS
SELECT DISTINCT
  xxhash64(facility_type) AS facility_key,
  facility_type
FROM STREAM silver_food_inspection;



In [0]:
-- Inspection_dim feeder
CREATE OR REFRESH Live TABLE silver_inspection_chi
AS
SELECT DISTINCT
  inspection_id,
  inspection_date,
  inspection_type
FROM STREAM silver_food_inspection;



In [0]:

-- Result_dim feeder (Chicago)
CREATE OR REFRESH LIVE TABLE silver_result_chi
AS
SELECT DISTINCT
  xxhash64(results) AS result_key,
  results           AS result
FROM STREAM silver_food_inspection
WHERE results IS NOT NULL;



In [0]:
-- Risk_dim feeder
CREATE OR REFRESH LIVE TABLE silver_risk_chi
AS
SELECT DISTINCT
  xxhash64(risk) AS risk_key,
  risk           AS risk_level
FROM STREAM silver_food_inspection
WHERE risk IS NOT NULL;



In [0]:
-- Violation_dim feeder
CREATE OR REFRESH LIVE TABLE silver_violation_chi
AS
SELECT DISTINCT
  violation_code       AS violation_key,
  violation_description
FROM STREAM silver_food_inspection
WHERE
  violation_code IS NOT NULL
  AND violation_description IS NOT NULL;

