# Find Locations Around Wildfires to Monitor
This layer accesses the recent FIRMS data and compliles a list of the sensor locations that will be called upon in the OpenAQ API to monitor the current air quality. I am only accepting the high confidence firms data points to reduce the number of API calls. 

This first code only collects the location_id between 5 days ago and today, in order to monitor the air quality for 5 days after a fire was present. It is currently filtering for high confidence wildfires, though once the code is more robust, I'll be loosening that restriction to include normal as well, but that will quintuple the number of results, and that means more careful handling on the API level.  

In [0]:
%sql
-- First create a temporary view with all the matched locations data
CREATE OR REPLACE TEMPORARY VIEW matched_locations_view AS
WITH yesterday_locations AS (
SELECT acq_date
  , ROUND(latitude + 0.36, 5) AS max_lat
  , ROUND(latitude - 0.36, 5) AS min_lat
  , ROUND(longitude + (40 / (111 * COS(latitude * 3.141592653589793 / 180))), 5) AS max_lon
  , ROUND(longitude - (40 / (111 * COS(latitude * 3.141592653589793 / 180))), 5) AS min_lon
FROM firms_data
WHERE acq_date BETWEEN current_date()-5 AND current_date()
AND confidence IN ('h', 'n')
), 
filtered_sensors AS (
  SELECT 
     DISTINCT s.nn_location_id
     , s.tract_lat
     , s.tract_lon
     , s.aq_confidence_level
  FROM sensors_with_income_levels s
  WHERE s.aq_confidence_level IN ('Moderate', 'High')
),
matched_locations AS (
  SELECT 
    s.nn_location_id
    , y.acq_date
  FROM yesterday_locations y
  INNER JOIN filtered_sensors s
    ON s.tract_lat BETWEEN y.min_lat AND y.max_lat
    AND s.tract_lon BETWEEN y.min_lon AND y.max_lon
  AND s.aq_confidence_level IN ('Moderate', 'High')
)
SELECT 
    nn_location_id
    , acq_date
FROM matched_locations;

-- Create temporary table for API calls
CREATE OR REPLACE TABLE temp_locations_for_api
USING DELTA
AS
SELECT DISTINCT nn_location_id AS location_id
FROM matched_locations_view;


-- -- Create permanent table for historical reference
-- CREATE OR REPLACE TABLE distinct_locations_historical
-- USING DELTA
-- PARTITIONED BY (query_date)
-- AS
-- SELECT 
--   DISTINCT nn_location_id AS location_id
--   , acq_date AS latest_active
--   , current_date() AS query_date
-- FROM matched_locations_view;

-- Merge into permanent historical table
MERGE INTO distinct_locations_historical target
USING (
    SELECT 
        nn_location_id AS location_id,
        MAX(acq_date) AS latest_active,
        current_date() AS query_date
    FROM matched_locations_view
    GROUP BY nn_location_id
) source
ON target.location_id = source.location_id 
AND target.query_date = source.query_date
WHEN NOT MATCHED THEN 
    INSERT (location_id, latest_active, query_date)
    VALUES (source.location_id, source.latest_active, source.query_date);

-- Optimize tables
OPTIMIZE temp_locations_for_api;
OPTIMIZE distinct_locations_historical
ZORDER BY (location_id);

In [0]:
# %sql
# -- Check for duplicates in temporary API table
# SELECT 
#     location_id,
#     COUNT(*) as count
# FROM temp_locations_for_api
# GROUP BY location_id
# HAVING COUNT(*) > 1;

In [0]:
# %sql
# -- Check for duplicates in permanent historical table (same location_id and query_date)
# SELECT 
#     location_id,
#     query_date,
#     latest_active,
#     COUNT(*) as count
# FROM distinct_locations_historical
# GROUP BY location_id, query_date, latest_active
# HAVING COUNT(*) > 1;

In [0]:
# %sql
# -- Additional check for the permanent table to show all duplicate records
# WITH DuplicateCheck AS (
#     SELECT 
#         location_id,
#         query_date,
#         latest_active,
#         ROW_NUMBER() OVER (
#             PARTITION BY location_id, query_date, latest_active 
#             ORDER BY latest_active DESC
#         ) as row_num
#     FROM distinct_locations_historical)
# SELECT *
# FROM DuplicateCheck
# WHERE row_num > 1;

In [0]:
# %sql
# -- Count total records vs distinct records
# SELECT 
#     COUNT(*) as total_records,
#     COUNT(DISTINCT location_id) as distinct_locations
# FROM temp_locations_for_api;

In [0]:
# %sql
# SELECT 
#     COUNT(*) as total_records,
#     COUNT(DISTINCT (location_id, query_date)) as distinct_combinations
# FROM distinct_locations_historical;

In [0]:
# %sql
# -- Check for specific examples of duplicates
# SELECT 
#     location_id,
#     query_date,
#     latest_active,
#     COUNT(*) as occurrence_count
# FROM distinct_locations_historical
# GROUP BY location_id, query_date, latest_active
# HAVING COUNT(*) > 1;

In [0]:
# %sql
# -- Look at the distribution of records over time
# SELECT 
#     query_date,
#     COUNT(DISTINCT location_id) as unique_locations,
#     COUNT(*) as total_records
# FROM distinct_locations_historical
# GROUP BY query_date
# ORDER BY query_date DESC;

In [0]:
# %sql
# -- Check for any locations that appear multiple times on the same day with different latest_active times
# SELECT 
#     location_id,
#     query_date,
#     COUNT(DISTINCT latest_active) as different_active_times,
#     MIN(latest_active) as earliest_active,
#     MAX(latest_active) as most_recent_active
# FROM distinct_locations_historical
# GROUP BY location_id, query_date
# HAVING COUNT(DISTINCT latest_active) > 1
# ORDER BY query_date DESC, location_id;