### Transform Customers Data
1. remove records with NULL customer id
2. remove duplicate records
3. remove duplicates based on created_timestamp
4. CAST the column values to correct data type
5. write data to delta table

###Remove records with NULL customer id

In [0]:
%sql
SELECT *
  FROM shoppix.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id


###remove duplicate records

In [0]:
%sql
SELECT DISTINCT *
  FROM shoppix.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id

In [0]:
%sql
SELECT customer_id,
       MAX(created_timestamp),
       MAX(customer_name),
       MAX(date_of_birth),
       MAX(email),
       MAX(member_since),
       MAX(telephone)
  FROM shoppix.bronze.v_customers
WHERE customer_id IS NOT NULL
GROUP BY customer_id
ORDER BY customer_id

In [0]:
%sql
CREATE OR REPLACE TEMPORARY VIEW v_customers_distinct
AS
SELECT DISTINCT *
  FROM shoppix.bronze.v_customers
WHERE customer_id IS NOT NULL
ORDER BY customer_id;

In [0]:
%sql
SELECT customer_id,
       MAX(created_timestamp) AS max_created_timestamp
  FROM v_customers_distinct
GROUP BY customer_id;

###remove duplicates based on created_timestamp

In [0]:
%sql
WITH cte_max AS
(
    SELECT customer_id,
    MAX(created_timestamp) AS max_created_timestamp
    FROM v_customers_distinct
    GROUP BY customer_id
)
SELECT t.*
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id
AND t.created_timestamp = m.max_created_timestamp;



###CAST the column values to correct data type

In [0]:
%sql
WITH cte_max AS
(
  SELECT customer_id,
      MAX(created_timestamp) AS max_created_timestamp
    FROM v_customers_distinct
    GROUP BY customer_id
)
SELECT CAST(t.created_timestamp AS TIMESTAMP) AS created_timestamp,
       t.customer_id,
       t.customer_name,
       CAST(t.date_of_birth AS DATE) AS date_of_birth,
       t.email,
       CAST(t.member_since AS DATE) AS member_since,
       t.telephone
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id
AND t.created_timestamp = m.max_created_timestamp;


###write data to delta table

In [0]:
%sql
CREATE TABLE shoppix.silver.customers
AS
WITH cte_max
AS 
(
  SELECT customer_id,
      MAX(created_timestamp) AS max_created_timestamp
    FROM v_customers_distinct
    GROUP BY customer_id
)
SELECT CAST(t.created_timestamp AS TIMESTAMP) AS created_timestamp,
       t.customer_id,
       t.customer_name,
       CAST(t.date_of_birth AS DATE) AS date_of_birth,
       t.email,
       CAST(t.member_since AS DATE) AS member_since,
       t.telephone
FROM v_customers_distinct t
JOIN cte_max m
ON t.customer_id = m.customer_id
AND t.created_timestamp = m.max_created_timestamp;

In [0]:
%sql
SELECT * FROM shoppix.silver.customers

###