##Data Enrich

####Creating Table

In [0]:
---DROP TABLE IF EXISTS digital_funnel.gold.clientesfunnel;
---DROP TABLE IF EXISTS digital_funnel.gold.pv_clientesfunnel;

In [0]:
---Table for the JOIN between clientes and funneldata
CREATE TABLE IF NOT EXISTS digital_funnel.gold.clientesfunnel
(
  cliente_id STRING,
  nombre STRING,
  edad INT,
  estado STRING,
  correo STRING,
  app STRING,
  stage STRING,
  stage_date DATE,
  proceso_completo INT,
  quality_check STRING,
  ingestion_time TIMESTAMP
)
LOCATION 'abfss://gold@firstprojectdlucext.dfs.core.windows.net/clientesfunnel/';

In [0]:
---Table for the pivoted stages and stages date
CREATE TABLE IF NOT EXISTS digital_funnel.gold.pv_clientesfunnel
(
  cliente_id STRING,
  proceso_completo INT,
  nombre STRING,
  edad INT,
  estado STRING,
  correo STRING,
  app STRING,
  introduccion DATE, 
  solicitud_de_datos DATE, 
  validacion_de_datos DATE, 
  reconocimiento_facial DATE,
  firma DATE, 
  confirmacion_de_datos DATE,
  ingestion_time TIMESTAMP, 
  updated_date TIMESTAMP 
)
LOCATION 'abfss://gold@firstprojectdlucext.dfs.core.windows.net/pv_clientesfunnel/';

####Data Validation & Enrichment

In [0]:
---JOIN between catalogdata and funneldata
CREATE OR REPLACE TEMP VIEW clientes_funnel
AS
SELECT fv.cliente_id,
       cd.nombre,
       cd.edad,
       cd.estado,
       cd.correo,
       fv.app,
       fv.stage,
       fv.stage_date,
       fv.proceso_completo,
       fv.quality_check
FROM (SELECT cliente_id,
       app,
       stage,
       stage_date,
       proceso_completo,
         CASE
             WHEN LENGTH(cliente_id) == 5 AND (proceso_completo == 0 OR proceso_completo == 1) THEN 'Passed'
             ELSE 'Not passed'
             END AS quality_check
      FROM digital_funnel.silver.FunnelData) AS fv
JOIN digital_funnel.silver.catalogdata AS cd ON (fv.cliente_id = cd.cliente_id);

In [0]:
--- Pivot stages and stages date
CREATE OR REPLACE TEMP VIEW clientesfunnel_pivot
AS
SELECT *
FROM (
    SELECT cliente_id, proceso_completo, nombre, edad, estado, correo, app, stage, stage_date
    FROM digital_funnel.gold.clientesfunnel
)
PIVOT (
    MAX(stage_date)
    FOR stage IN ("Introduccion" AS introduccion, "Solicitud de datos" AS solicitud_de_datos, "Validacion de datos" AS validacion_de_datos, "Reconocimiento facial" AS reconocimiento_facial, "Firma" AS firma, "Confirmacion de datos" AS confirmacion_de_datos)
)
ORDER BY cliente_id;

SELECT * FROM clientesfunnel_pivot;

####Ingesting into gold

In [0]:
MERGE INTO digital_funnel.gold.clientesfunnel AS tgt
USING clientes_funnel AS src
ON (tgt.cliente_id = src.cliente_id AND tgt.stage = src.stage)
WHEN NOT MATCHED THEN
  INSERT (tgt.cliente_id, tgt.nombre, tgt.edad, tgt.estado, tgt.correo, tgt.app, tgt.stage, tgt.stage_date, tgt.proceso_completo, tgt.quality_check, tgt.ingestion_time)
  VALUES (src.cliente_id, src.nombre, src.edad, src.estado, src.correo, src.app, src.stage, src.stage_date, src.proceso_completo, src.quality_check, current_timestamp);
---PARTITIONED BY (race_year)

In [0]:
MERGE INTO digital_funnel.gold.pv_clientesfunnel AS tgt
USING clientesfunnel_pivot AS src
ON (tgt.cliente_id = src.cliente_id)
WHEN MATCHED THEN
  UPDATE SET tgt.nombre = src.nombre,
             tgt.correo = src.correo,
             tgt.edad = src.edad,
             tgt.estado = src.estado,
             tgt.updated_date = current_timestamp
WHEN NOT MATCHED THEN
  INSERT (tgt.cliente_id, tgt.proceso_completo, tgt.nombre, tgt.edad, tgt.estado, tgt.correo, tgt.app, tgt.introduccion, tgt.solicitud_de_datos, tgt.validacion_de_datos, tgt.reconocimiento_facial, tgt.firma, tgt.confirmacion_de_datos, tgt.ingestion_time)
  VALUES (src.cliente_id, src.proceso_completo, src.nombre, src.edad, src.estado, src.correo, src.app, src.introduccion, src.solicitud_de_datos, src.validacion_de_datos, src.reconocimiento_facial, src.firma, src.confirmacion_de_datos, current_timestamp);
---PARTITIONED BY (race_year)

In [0]:
SELECT * FROM digital_funnel.gold.pv_clientesfunnel;

####Calculations

In [0]:
---Clients that finished all stages
SELECT COUNT(*) FROM digital_funnel.gold.pv_clientesfunnel WHERE proceso_completo = 1;

In [0]:
---Clients that finished all stages. Note that 60 clients only have done the last stage. This clients will been considered as clients tha finished all stages. 
SELECT COUNT(*) FROM digital_funnel.gold.pv_clientesfunnel WHERE proceso_completo = 0;

In [0]:
---Total time between the first and last stage for each client
SELECT cliente_id, 
       proceso_completo,
       DATEDIFF(confirmacion_de_datos, introduccion) AS dias_totales
  FROM digital_funnel.gold.pv_clientesfunnel
 WHERE proceso_completo = 1;

In [0]:
---Most frequent abandonment stage
SELECT COUNT(introduccion) AS TI,
       COUNT(solicitud_de_datos) AS TSD,
       COUNT(validacion_de_datos) AS TVD,
       COUNT(reconocimiento_facial) AS TRF,
       COUNT(firma) AS TF,
       COUNT(confirmacion_de_datos) AS TCD
  FROM (SELECT * FROM digital_funnel.gold.pv_clientesfunnel WHERE proceso_completo = 0);

In [0]:
---Most frequent abandonment stage
SELECT COUNT(CASE WHEN solicitud_de_datos IS NULL AND
                       validacion_de_datos IS NULL AND
                       reconocimiento_facial IS NULL AND
                       firma IS NULL AND
                       confirmacion_de_datos IS NULL 
                  THEN 1 END) AS TI,
       COUNT(CASE WHEN  solicitud_de_datos IS NOT NULL AND
                        validacion_de_datos IS NULL AND
                        reconocimiento_facial IS NULL AND
                        firma IS NULL AND
                        confirmacion_de_datos IS NULL 
                  THEN 1 END) AS TSD,
       COUNT(CASE WHEN  validacion_de_datos IS NOT NULL AND
                        reconocimiento_facial IS NULL AND
                        firma IS NULL AND
                        confirmacion_de_datos IS NULL 
                  THEN 1 END) AS TVD,
       COUNT(CASE WHEN  reconocimiento_facial IS NOT NULL AND
                        firma IS NULL AND
                        confirmacion_de_datos IS NULL 
                  THEN 1 END) AS TRF,
       COUNT(CASE WHEN  firma IS NOT NULL AND
                        confirmacion_de_datos IS NULL 
                  THEN 1 END) AS TF,
       COUNT(CASE WHEN  confirmacion_de_datos IS NOT NULL 
                  THEN 1 END) AS TCD
  FROM (SELECT * FROM digital_funnel.gold.pv_clientesfunnel WHERE proceso_completo = 0);