#Getting the data from the Bronze Catalog

## The following is the data 
- This is from the Bronze catalog
- The Schema contains the raw files of the data

### Extracting the Customers Data

In [0]:
%sql
SELECT *
FROM json.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/customers/customers_2024_10.json`;

### Based on the analysis 
- The customer data has incorrect datatypes (Date of birth)
- Also there must be some other transformations applied on data

In [0]:
%sql
SELECT *
FROM JSON.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/customers/`

In [0]:
%sql
SELECT
  _metadata.file_path as file_name, 
    *
FROM json.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/customers/`;

In [0]:
%sql
CREATE OR REPLACE VIEW gizmoboxcatalog.bronzeschema.v_customers
AS 
  SELECT
    _metadata.file_path as file_name, 
    *
  FROM json.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/customers/`;

In [0]:
%sql
SELECT *
FROM gizmoboxcatalog.bronzeschema.v_customers
limit 1;

### Extracting the Orders Data

In [0]:
%sql
SELECT *
FROM json.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/Orders/`

### As seen above the data is not as per the standard data required 
- Some of the order dates are missing with the double quotes ""
- We need to apply some transformations 
- Need to handle Null values 
- In order to do that we need to read the file as text and not Json

In [0]:
%sql
SELECT *
FROM text.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/Orders/`

In [0]:
%sql
CREATE OR REPLACE VIEW gizmoboxcatalog.bronzeschema.v_orders
AS
SELECT *
FROM text.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/Orders/`;

SELECT * 
FROM gizmoboxcatalog.bronzeschema.v_orders;

### Extracting the Address Data

In [0]:
%sql
SELECT *
FROM csv.`/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/addresses/`

### As the above data is not as per the standard we need to
- Extract the data using the read files to tell the reader about the headers 
- Also we need to parse the data

In [0]:
%sql
SELECT *
FROM READ_FILES(
                '/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/addresses/',
                format => 'csv', 
                delimiter => '\t',
                header => true
              );

In [0]:
%sql
CREATE OR REPLACE VIEW gizmoboxcatalog.bronzeschema.v_addresses
AS 
SELECT *
FROM READ_FILES(
                '/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/addresses/',
                format => 'csv', 
                delimiter => '\t',
                header => true
              );

SELECT *
FROM gizmoboxcatalog.bronzeschema.v_addresses;

### Extract the Payments Data 

In [0]:
%sql
-- Step 1: Define schema
CREATE OR REPLACE TABLE gizmoboxcatalog.bronzeschema.payments 
(
  payment_id INTEGER,
  order_id INTEGER,
  payment_timestamp TIMESTAMP,
  payment_status INTEGER, 
  payment_method STRING
);

-- Step 2: Load data from CSV in Volume
INSERT INTO gizmoboxcatalog.bronzeschema.payments
SELECT
  CAST(_c0 AS INTEGER)        AS payment_id,
  CAST(_c1 AS INTEGER)        AS order_id,
  CAST(_c2 AS TIMESTAMP)      AS payment_timestamp,
  CAST(_c3 AS INTEGER)        AS payment_status,
  CAST(_c4 AS STRING)         AS payment_method
FROM read_files(
  '/Volumes/gizmoboxcatalog/bronzeschema/rawfiles/Payments/',
  format => 'csv',
  header => false,
  delimiter => ','
);


In [0]:
%sql
SELECT *
FROM gizmoboxcatalog.bronzeschema.payments;

In [0]:
# %sql
# REFRESH TABLE gizmoboxcatalog.bronzeschema.payments;