# Quality Checks

Script Purpose:

    This script performs various quality checks for data consistency, accuracy, 
    and standardization across the 'silver' layer. It includes checks for:
    - Null or duplicate primary keys.

    - Unwanted spaces in string fields.

    - Data standardization and consistency.

    - Invalid date ranges and orders.

    - Data consistency between related fields.

Usage Notes:

    - Run these checks after data loading Silver Layer.
    
    - Investigate and resolve any discrepancies found during the checks.

In [0]:
%sql
USE CATALOG dev_project;
USE SCHEMA bronze;
USE SCHEMA silver

## Checking 'silver.crm_cust_info'

In [0]:
%sql
-- Check for NULLs or Duplicates in Primary Key
-- Expectation: No Results
SELECT 
    customer_id,
    COUNT(*) 
FROM silver.crm_cust_info
GROUP BY customer_id
HAVING COUNT(*) > 1 OR customer_id IS NULL;

-- Check for Unwanted Spaces
-- Expectation: No Results
SELECT 
    customer_number 
FROM silver.crm_cust_info
WHERE customer_number != TRIM(customer_number);

-- Data Standardization & Consistency
SELECT DISTINCT 
    marital_status 
FROM silver.crm_cust_info;

## Checking 'silver.crm_prd_info'

In [0]:
%sql
-- Check for NULLs or Duplicates in Primary Key
-- Expectation: No Results
SELECT 
    product_id,
    COUNT(*) 
FROM silver.crm_prd_info
GROUP BY product_id
HAVING COUNT(*) > 1 OR product_id IS NULL;

-- Check for Unwanted Spaces
-- Expectation: No Results
SELECT 
    product_number 
FROM silver.crm_prd_info
WHERE product_number != TRIM(product_number);

-- Check for NULLs or Negative Values in Cost
-- Expectation: No Results
SELECT 
    product_cost
FROM silver.crm_prd_info
WHERE product_cost < 0 OR product_cost IS NULL;

-- Data Standardization & Consistency
SELECT DISTINCT 
    product_line 
FROM silver.crm_prd_info;

-- Check for Invalid Date Orders (Start Date > End Date)
-- Expectation: No Results
SELECT 
    * 
FROM silver.crm_prd_info
WHERE end_date < start_date;

## Checking 'silver.crm_sales_details'

In [0]:
%sql
-- Check for Invalid Dates
-- Expectation: No Invalid Dates
SELECT 
    NULLIF(sls_due_dt, 0) AS sls_due_dt 
FROM bronze.crm_sales_details
WHERE sls_due_dt <= 0 
    OR LEN(sls_due_dt) != 8 
    OR sls_due_dt > 20500101 
    OR sls_due_dt < 19000101;

-- Check for Invalid Date Orders (Order Date > Shipping/Due Dates)
-- Expectation: No Results
SELECT 
    * 
FROM silver.crm_sales_details
WHERE order_date > ship_date 
   OR order_date > due_date;

-- Check Data Consistency: Sales = Quantity * Price
-- Expectation: No Results
SELECT DISTINCT 
    sales_amount,
    quantity,
    price 
FROM silver.crm_sales_details
WHERE sales_amount != quantity * price
   OR sales_amount IS NULL 
   OR quantity IS NULL 
   OR price IS NULL
   OR sales_amount <= 0 
   OR quantity <= 0 
   OR price <= 0
ORDER BY sales_amount, quantity, price;

## Checking 'silver.erp_cust_az12'

In [0]:
%sql
-- Identify Out-of-Range Dates
-- Expectation: Birthdates between 1924-01-01 and Today
SELECT DISTINCT 
    birth_date 
FROM silver.erp_cust_az12
WHERE birth_date < '1924-01-01' 
   OR birth_date > GETDATE();

-- Data Standardization & Consistency
SELECT DISTINCT 
    gender 
FROM silver.erp_cust_az12;

## Checking 'silver.erp_loc_a101'

In [0]:
%sql
-- Data Standardization & Consistency
SELECT DISTINCT 
    country 
FROM silver.erp_loc_a101
ORDER BY country;

## Checking 'silver.erp_px_cat_g1v2'

In [0]:
%sql
-- Check for Unwanted Spaces
-- Expectation: No Results
SELECT 
    * 
FROM silver.erp_px_cat_g1v2
WHERE category != TRIM(category) 
   OR subcategory != TRIM(subcategory) 
   OR maintenance_flag != TRIM(maintenance_flag);

-- Data Standardization & Consistency
SELECT DISTINCT 
    maintenance_flag 
FROM silver.erp_px_cat_g1v2;