In [None]:
%load_ext sql

In [None]:
%sql mysql+pymysql://root:donholy@localhost:3306/md_water_services

In [None]:
%config SqlMagic.displaylimit = None

In [None]:
%%sql

-- JOINING LOCATION TABLE TO VISITS TTABLE
CREATE VIEW
    combined_analysis_table as (
SELECT
   loc.province_name AS province_name,
    loc.town_name,
    wats.type_of_water_source,
    loc.location_type,
    wats.number_of_people_served,
    vis.time_in_queue,
    well.results
FROM
    location loc
JOIN
    visits vis
    ON loc.location_id = vis.location_id
JOIN
    water_source wats
    ON wats.source_id = vis.source_id
LEFT JOIN
    well_pollution well
    ON vis.source_id = well.source_id
WHERE vis.visit_count = 1);


In [None]:
%%sql

WITH province_totals AS (-- This CTE calculates the population of each province
SELECT
    province_name,
    SUM(number_of_people_served) AS total_ppl_serv
FROM
    combined_analysis_table
GROUP BY
    province_name)
SELECT
    ct.province_name,
    ROUND((SUM(CASE WHEN type_of_water_source = 'river'
                THEN number_of_people_served ELSE 0 END) * 100.0 / pt.total_ppl_serv), 0) AS river,
    ROUND((SUM(CASE WHEN type_of_water_source = 'shared_tap'
                THEN number_of_people_served ELSE 0 END) * 100.0 / pt.total_ppl_serv), 0) AS shared_tap,
    ROUND((SUM(CASE WHEN type_of_water_source = 'tap_in_home'
                THEN number_of_people_served ELSE 0 END) * 100.0 / pt.total_ppl_serv), 0) AS tap_in_home,
    ROUND((SUM(CASE WHEN type_of_water_source = 'tap_in_home_broken'
                THEN number_of_people_served ELSE 0 END) * 100.0 / pt.total_ppl_serv), 0) AS tap_in_home_broken,
    ROUND((SUM(CASE WHEN type_of_water_source = 'well'
                THEN number_of_people_served ELSE 0 END) * 100.0 / pt.total_ppl_serv), 0) AS well
FROM
    combined_analysis_table ct
JOIN
    province_totals pt ON ct.province_name = pt.province_name
GROUP BY
    ct.province_name
ORDER BY
    ct.province_name;

In [None]:
%%sql

WITH town_totals AS (
SELECT 
    province_name, town_name, 
    SUM(number_of_people_served) AS total_ppl_serv
FROM 
    combined_analysis_table
GROUP BY 
province_name,town_name)

SELECT
    ct.province_name,
    ct.town_name,
    ROUND((SUM(CASE WHEN type_of_water_source = 'river'
                THEN number_of_people_served ELSE 0 END) * 100.0 / tt.total_ppl_serv), 0) AS river,
    ROUND((SUM(CASE WHEN type_of_water_source = 'shared_tap'
                THEN number_of_people_served ELSE 0 END) * 100.0 / tt.total_ppl_serv), 0) AS shared_tap,
    ROUND((SUM(CASE WHEN type_of_water_source = 'tap_in_home'
                THEN number_of_people_served ELSE 0 END) * 100.0 / tt.total_ppl_serv), 0) AS tap_in_home,
    ROUND((SUM(CASE WHEN type_of_water_source = 'tap_in_home_broken'
                THEN number_of_people_served ELSE 0 END) * 100.0 / tt.total_ppl_serv), 0) AS tap_in_home_broken,
    ROUND((SUM(CASE WHEN type_of_water_source = 'well'
                THEN number_of_people_served ELSE 0 END) * 100.0 / tt.total_ppl_serv), 0) AS well
FROM
    combined_analysis_table ct
JOIN
    town_totals tt 
        ON ct.province_name = tt.province_name AND ct.town_name = tt.town_name
GROUP BY
    ct.province_name,
    ct.town_name
ORDER BY
    shared_tap desc;

In [None]:
%%sql

SELECT
    province_name,
    town_name,
    ROUND(tap_in_home_broken / (tap_in_home_broken + tap_in_home) *100,0) AS Pct_broken_taps
FROM
    town_aggregated_water_access

In [None]:
%%sql

SELECT
    location.address,
    location.town_name,
    location.province_name,
    water_source.source_id,
    water_source.type_of_water_source,
    well_pollution.results,
    CASE
		WHEN (water_source.type_of_water_source = 'well' AND well_pollution.results = 'Contaminated: Chemical')
        THEN 'Install RO filter'
        WHEN (water_source.type_of_water_source = 'well' AND well_pollution.results = 'Contaminated: Biological')
        THEN 'Install UV and RO filter'
        WHEN water_source.type_of_water_source = 'river' 
        THEN 'Drill well'
        WHEN water_source.type_of_water_source = 'shared_tap' AND visits.time_in_queue >= 30
        THEN CONCAT('Install', FLOOR(visits.time_in_queue / 30), 'taps nearby')
        WHEN water_source.type_of_water_source = 'tap_in_home_broken'
        THEN 'Diagnose infrastructure'
        ELSE NULL
    END AS improvements
FROM
    water_source
LEFT JOIN
    well_pollution 
        ON water_source.source_id = well_pollution.source_id
INNER JOIN
    visits 
        ON water_source.source_id = visits.source_id
INNER JOIN
    location 
        ON location.location_id = visits.location_id
WHERE 
    visits.visit_count = 1
    AND (well_pollution.results != 'clean'
    OR water_source.type_of_water_source IN ('tap_in_home_broken', 'river')
    OR water_source.type_of_water_source = 'shared_tap' AND visits.time_in_queue >= 30);

In [None]:
%%sql

INSERT INTO
	project_progress (
        source_id,
        Address,
        Town,
        Province,
        Source_type,
        Improvement
        )
SELECT
    water_source.source_id,
    location.address,
    location.town_name,
    location.province_name,
    water_source.type_of_water_source,
    CASE
		WHEN (water_source.type_of_water_source = 'well' AND well_pollution.results = 'Contaminated: Chemical')
        THEN 'Install RO filter'
        WHEN (water_source.type_of_water_source = 'well' AND well_pollution.results = 'Contaminated: Biological')
        THEN 'Install UV and RO filter'
        WHEN water_source.type_of_water_source = 'river' 
        THEN 'Drill well'
        WHEN water_source.type_of_water_source = 'shared_tap' AND visits.time_in_queue >= 30
        THEN CONCAT('Install', FLOOR(visits.time_in_queue / 30), 'taps nearby')
        WHEN water_source.type_of_water_source = 'tap_in_home_broken'
        THEN 'Diagnose infrastructure'
        ELSE NULL
    END AS improvements
FROM
    water_source
LEFT JOIN
    well_pollution 
        ON water_source.source_id = well_pollution.source_id
INNER JOIN
    visits 
        ON water_source.source_id = visits.source_id
INNER JOIN
    location 
        ON location.location_id = visits.location_id
WHERE 
    visits.visit_count = 1
    AND (well_pollution.results != 'clean'
    OR water_source.type_of_water_source IN ('tap_in_home_broken', 'river')
    OR water_source.type_of_water_source = 'shared_tap' AND visits.time_in_queue >= 30);