## 03_quality checks
### Purpose: ensure data sanity, integrity and accuracy after silver transformations

### Rows count

In [0]:
SELECT 'clientes_raw' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.clientes_raw
UNION ALL
SELECT 'clientes_clean' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.clientes_clean
UNION ALL
SELECT 'productos_raw' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.productos_raw
UNION ALL
SELECT 'productos_clean' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.productos_clean
UNION ALL
SELECT 'empleados_raw' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.empleados_raw
UNION ALL
SELECT 'empleados_clean' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.empleados_clean
UNION ALL
SELECT 'locales_raw' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.locales_raw
UNION ALL
SELECT 'locales_clean' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.locales_clean
UNION ALL
SELECT 'facturas_raw' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.facturas_raw
UNION ALL
SELECT 'facturas_clean' AS tabla, COUNT(*) AS filas 
FROM curso.ventas.facturas_clean
ORDER BY tabla ASC;

tabla,filas
clientes_clean,2500
clientes_raw,2500
empleados_clean,56
empleados_raw,56
facturas_clean,100000
facturas_raw,100000
locales_clean,14
locales_raw,14
productos_clean,119
productos_raw,119


### Referencial integrity

In [0]:
-- Clients table

SELECT COUNT(*) AS facturas_fk_cliente_invalida
FROM curso.ventas.facturas_clean f
LEFT JOIN curso.ventas.clientes_clean c
  ON f.id_cliente = c.id_cliente
WHERE f.id_cliente IS NOT NULL
  AND c.id_cliente IS NULL;


facturas_fk_cliente_invalida
0


In [0]:
-- Products table
SELECT COUNT(*) AS facturas_fk_producto_invalida
FROM curso.ventas.facturas_clean f
LEFT JOIN curso.ventas.productos_clean p
  ON f.id_producto = p.id_producto
WHERE f.id_producto IS NOT NULL
  AND p.id_producto IS NULL;

facturas_fk_producto_invalida
0


In [0]:
-- Employees table
SELECT COUNT(*) AS facturas_fk_vendedor_invalida
FROM curso.ventas.facturas_clean f
LEFT JOIN curso.ventas.empleados_clean e
  ON f.id_vendedor = e.id_vendedor
WHERE f.id_vendedor IS NOT NULL
  AND e.id_vendedor IS NULL;

facturas_fk_vendedor_invalida
13922


In [0]:
SELECT COUNT(*) AS empleados_fk_sucursal_invalida
FROM curso.ventas.empleados_clean e
LEFT JOIN curso.ventas.locales_clean l
  ON e.sucursal = l.id_sucursal
WHERE e.sucursal IS NOT NULL
  AND l.id_sucursal IS NULL;

empleados_fk_sucursal_invalida
0


### Fact table sanity

In [0]:
SELECT COUNT(*) AS facturas_cantidad_invalida
FROM curso.ventas.facturas_clean
WHERE cantidad IS NULL OR cantidad <= 0;

facturas_cantidad_invalida
0


In [0]:
SELECT COUNT(*) AS facturas_fecha_futura
FROM curso.ventas.facturas_clean
WHERE fecha_venta > current_date();

facturas_fecha_futura
0


In [0]:
SELECT COUNT(*) AS facturas_duplicadas
FROM curso.ventas.facturas_clean
GROUP BY num_factura
HAVING facturas_duplicadas > 1;

facturas_duplicadas


As there are several sales records with employees not in the employees data, we take a look at some example records

In [0]:
SELECT *
FROM curso.ventas.facturas_clean f
LEFT JOIN curso.ventas.empleados_clean e
  ON f.id_vendedor = e.id_vendedor
WHERE f.id_vendedor IS NOT NULL
  AND e.id_vendedor IS NULL
LIMIT 20;

num_factura,fecha_venta,id_producto,id_vendedor,id_cliente,cantidad,id_vendedor.1,sucursal,nombre,apellido
0001-14069670,2023-04-12,87,60,1285,36,,,,
0001-14960566,2022-03-13,104,63,1432,149,,,,
0001-82904878,2021-08-29,46,63,293,133,,,,
0011-55412993,2022-10-16,58,60,2146,84,,,,
0011-62426160,2023-02-27,63,57,1054,119,,,,
0011-66989817,2023-05-07,94,62,1213,56,,,,
0013-36860191,2023-08-31,70,63,743,150,,,,
0015-27838280,2021-05-17,88,58,411,156,,,,
0019-81120933,2022-12-15,43,65,261,101,,,,
0020-00123570,2023-02-27,108,62,1518,160,,,,


### Results:
### -Invalid FK to employees: 13.922 records.
###   -Intepretation: historical employees that were erased, instead of flagged.
###   -Action: flag the records in the Gold layer.
