In [None]:
%load_ext sql

In [None]:
%sql mysql+pymysql://root:donholy@localhost:3306/md_water_services

In [None]:
%config SqlMagic.displaylimit = None

In [None]:
%%sql

-- linking the auditor records, visits and water quality tables

SELECT
    audit.location_id AS audit_location,
    audit.true_water_source_score,
    vis.record_id,
    watq.subjective_quality_score
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id;

In [None]:
%%sql

-- linking the auditor records, visits and water quality tables

SELECT
    audit.location_id AS audit_location,
    audit.true_water_source_score,
    vis.record_id,
    watq.subjective_quality_score, 
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score = 0
    AND vis.visit_count = 1;

In [None]:
%%sql
-- adding one character to last querry to check the discrepancies between auditor and field agents

SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score, 
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1;

In [None]:
%%sql

SELECT
    audit.location_id AS audit_location as location_id,
    audit.type_of_water_source as auditors_source,
    wats.type_of_water_source as survey_source,
    vis.record_id,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    water_source wats
    ON vis.source_id = wats.source_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1;

In [None]:
%%sql

SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1;

In [None]:
%%sql

SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1;

In [None]:
%%sql

-- converting into CTE 

WITH
    Incorrect_records as (
SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1)
    
SELECT
    *
FROM incorrect_records;


In [None]:
%%sql
-- DISTINCT EMPLOYEES WHO MADE MISTAKES

WITH
    Incorrect_records as (
SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name as employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1)

SELECT
    DISTINCT employee_name
FROM
    incorrect_records;

In [None]:
%%sql

-- NUMBER OF INCORRECT RECORDS BY EMPLOYEES

WITH
    Incorrect_records as (
SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name as employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1)

SELECT
    employee_name,
    COUNT(employee_name) as number_of_mistakes
FROM
    incorrect_records
GROUP BY employee_name
ORDER BY number_of_mistakes DESC;

In [None]:

%%sql

WITH
    error_count as (
WITH
    Incorrect_records as (
SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name as employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1)

SELECT
    employee_name,
    COUNT(employee_name) as number_of_mistakes
FROM
    incorrect_records
GROUP BY employee_name
ORDER BY number_of_mistakes DESC)

SELECT
    AVG(number_of_mistakes) as avg_error_count_per_empl
FROM
    error_count;

In [None]:
%%sql

WITH
    error_count as (
WITH
    Incorrect_records as (
SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name as employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    watq.subjective_quality_score - audit.true_water_source_score as diff
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1)

SELECT
    employee_name,
    COUNT(employee_name) as number_of_mistakes
FROM
    incorrect_records
GROUP BY employee_name
ORDER BY number_of_mistakes DESC)
    
SELECT
    employee_name,
    number_of_mistakes
FROM
    error_count
WHERE 
    number_of_mistakes > (SELECT
                            AVG(number_of_mistakes) as avg_error_count_per_empl
                        FROM
                            error_count);

In [None]:
%%sql

-- CREATING A VIEW

CREATE VIEW Incorrect_records1 as (
    SELECT
    audit.location_id AS audit_location,
    vis.record_id,
    emp.employee_name as employee_name,
    audit.true_water_source_score as auditor_score,
    watq.subjective_quality_score as employee_score,
    vis.assigned_employee_id as assigned_employee_id,
    audit.staments
FROM
    auditor_report audit
JOIN
    visits vis
    ON audit.location_id = vis.location_id
JOIN
    water_quality watq
    ON vis.record_id = watq.record_id
JOIN
    employee emp
    ON vis.assigned_employee_id = emp.assigned_employee_id
WHERE
    watq.subjective_quality_score - audit.true_water_source_score != 0
    AND vis.visit_count = 1);

In [None]:
%%sql
WITH
    error_count AS (
SELECT
    employee_name,
    COUNT(employee_name) AS number_of_mistakes
FROM
    incorrect_records
GROUP BY
    employee_name)
    
SELECT
    AVG(number_of_mistakes) as avg_error_count_per_empl
FROM
    error_count;


In [None]:
%%sql

WITH
    error_count AS (
SELECT
    employee_name,
    COUNT(employee_name) AS number_of_mistakes
FROM
    incorrect_records
GROUP BY
    employee_name)
    
SELECT
    employee_name,
    number_of_mistakes
FROM
    error_count
WHERE 
    number_of_mistakes > (SELECT
                            AVG(number_of_mistakes) as avg_error_count_per_empl
                        FROM
                            error_count);

In [None]:
%%sql

WITH
    suspect_list AS (
WITH
    error_count AS (
SELECT
    employee_name,
    COUNT(employee_name) AS number_of_mistakes
FROM
    incorrect_records1
GROUP BY
    employee_name)
    
SELECT
    employee_name,
    number_of_mistakes
FROM
    error_count
WHERE 
    number_of_mistakes > (SELECT
                            AVG(number_of_mistakes) as avg_error_count_per_empl
                        FROM
                            error_count))

SELECT 
       employee_name,
    audit_location,
    statements
FROM
    incorrect_records1
WHERE
    employee_name IN (SELECT employee_name FROM suspect_list);

In [None]:
%%sql

WITH
    suspect_list AS (
WITH
    error_count AS (
SELECT
    employee_name,
    COUNT(employee_name) AS number_of_mistakes
FROM
    incorrect_records1
GROUP BY
    employee_name)
    
SELECT
    employee_name,
    number_of_mistakes
FROM
    error_count
WHERE 
    number_of_mistakes > (SELECT
                            AVG(number_of_mistakes) as avg_error_count_per_empl
                        FROM
                            error_count))

SELECT 
    employee_name,
    audit_location,
    statements
FROM
    incorrect_records1
WHERE
    employee_name IN (SELECT employee_name FROM suspect_list)
    AND statements LIKE '%cash%';

In [None]:
%%sql

WITH
    suspect_list AS (
WITH
    error_count AS (
SELECT
    employee_name,
    COUNT(employee_name) AS number_of_mistakes
FROM
    incorrect_records1
GROUP BY
    employee_name)
    
SELECT
    employee_name,
    number_of_mistakes
FROM
    error_count
WHERE 
    number_of_mistakes > (SELECT
                            AVG(number_of_mistakes) as avg_error_count_per_empl
                        FROM
                            error_count))

SELECT 
    employee_name,
    audit_location,
    statements
FROM
    incorrect_records1
WHERE
    employee_name NOT IN (SELECT employee_name FROM suspect_list)
    AND statements LIKE '%cash%';