In [1]:
# ! pip install scipy shapely geopandas psycopg2-binary geoalchemy2 python-dotenv

# Import Block

In [2]:
import os
import pandas as pd
import geopandas as gpd
from copy import deepcopy
from dotenv import load_dotenv

# # This is not always giving the colosest possible station for some reason there is code to now manually retrive the distances
# from scipy.spatial import KDTree

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("../src")

from utils.Database import Database

# Load Environment

In [3]:
load_dotenv(".env")

TABLE_FIRE = os.getenv("TABLE_FIRE")
TABLE_WEATHER_METADATA = os.getenv("TABLE_WEATHER_METADATA")
TABLE_FIRE_NEAREST_WEATHER_METADATA = os.getenv("TABLE_FIRE_NEAREST_WEATHER_METADATA")

# Constants

In [4]:
GEOMETRY_COLUMNS = ['geometry_fire', 'geometry_station']

FIRE_IMPORTANT_COLUMNS = ['YEAR', 'MONTH', 'DAY', 'REP_DATE', 'CALC_HA', 'CAUSE', 'SRC_AGY2', 'geometry']

## Helper Functions

In [5]:
def filter_fire_data_by_year(
    year:int,
    fire_gdf:gpd.GeoDataFrame,
    filter_columns:list = FIRE_IMPORTANT_COLUMNS 
):  
    # filter by year
    filtered_gdf =  fire_gdf[
        fire_gdf["YEAR"] == year
    ]

    # filter columns
    filtered_gdf = filtered_gdf[filter_columns]

    return deepcopy(filtered_gdf)

# # Sample rum 
# filter_fire_data_by_year(
#     year = 2011,
#     fire_gdf = fire_gdf
# )

In [6]:
def filter_station_by_operation_year(
    year:int,
    station_meatdata_gdf:gpd.GeoDataFrame,
    start_yr_col_name:str = 'FIRST_YR',
    end_yr_col_name:str = 'LAST_YR'
):
    filtered_station_gdf = station_meatdata_gdf[
        # The years are inclusive since the daly reading for each staion start at the first day of the year
        (station_meatdata_gdf[start_yr_col_name] <= year) & (station_meatdata_gdf[end_yr_col_name] >= year)
    ] 
    return deepcopy(filtered_station_gdf)

# # Sample Run
# filter_station_by_operation_year(
#     year = 1999,
#     station_meatdata_gdf = weather_meatdata_gdf
# )

In [7]:
def get_nearest_station_to_fire(
    year:int,
    fire_gdf:gpd.GeoDataFrame,
    station_meatdata_gdf:gpd.GeoDataFrame,
    distance_in_meters:bool = True,
):    
    # get fires in the year
    yr_fire_gdf = filter_fire_data_by_year(
        year = year,
        fire_gdf = fire_gdf
    )

    # set CRS to "EPSG:3857" to get distance in meters
    if distance_in_meters == True:
        yr_fire_gdf = yr_fire_gdf.to_crs("EPSG:3857")

    # get operational stations
    operational_stations_gdf = filter_station_by_operation_year(
        year = year,
        station_meatdata_gdf = station_meatdata_gdf
    )

    # skip if ther are no sations available
    if(len(operational_stations_gdf) == 0):
        del operational_stations_gdf
        del yr_fire_gdf
        # del fire_cords
        return None

    # set CRS to "EPSG:3857" to get distance in meters
    if distance_in_meters == True:
        operational_stations_gdf = operational_stations_gdf.to_crs("EPSG:3857")

    # get the distancec to each station
    fire_to_Station_distance = yr_fire_gdf.centroid.apply(
        lambda fire_cord: fire_cord.distance(
            operational_stations_gdf.geometry
        )
    )

    # get the cloeasest station
    yr_fire_gdf['DISTANCE'] = fire_to_Station_distance.min(
        axis = 1
    )

    # get the index of cloeasest station
    yr_fire_gdf['STATION_INDEX'] = fire_to_Station_distance.idxmin( 
        axis = 1
    )
    del fire_to_Station_distance

    # set CRS to default if CRS was set in meters
    if distance_in_meters == True:
        yr_fire_gdf = yr_fire_gdf.to_crs("EPSG:4326")
        operational_stations_gdf = operational_stations_gdf.to_crs("EPSG:4326")

    # add station data
    yr_fire_gdf = yr_fire_gdf.merge(
        operational_stations_gdf, 
        left_on="STATION_INDEX", 
        right_index=True, 
        suffixes=("_fire", "_station")
    )
    del operational_stations_gdf

    return yr_fire_gdf

# # Sample RUn
# get_nearest_station_to_fire(
#     year = 2011,
#     fire_gdf = fire_gdf,
#     station_meatdata_gdf = weather_meatdata_gdf,
#     distance_in_meters = False,
# )

In [8]:
def set_geometry_for_all_gdf(
    data:gpd.GeoDataFrame,
    in_meters:bool = False,
    data_geometry_columns:list = GEOMETRY_COLUMNS
):
    # set crs of all geometry columns
    for col in data_geometry_columns:
        # set crs of line path 
        data[col] = data[col].to_crs(
            "EPSG:3857" if in_meters else "EPSG:4326",
        )

# Establish Database Connection

In [9]:
db = Database()

Connection Established!!!
	Engine(postgresql://wireaiadmin:***@localhost:5434/weather_db)


# Read Data

In [10]:
# Get Fire Boundaries
fire_gdf = gpd.read_postgis(
    sql = f'SELECT * FROM {TABLE_FIRE} as f order by f."YEAR";',
    con = db.connection,
    geom_col = "geometry"
)

# Get Weather station Cordimates
weather_meatdata_gdf = gpd.read_postgis(
    sql = f'SELECT * FROM {TABLE_WEATHER_METADATA};',
    con = db.connection,
    geom_col = "geometry"
)

# Data Pre-Processing

In [11]:
# get the first known year of weather data
weather_start_yr = weather_meatdata_gdf['FIRST_YR'].min()

# get unique fire years
# Note: alternatelvely you can just loop over all years since ther is no year where there was no fire.
#       I want this code to be usable for any country hence chose to go by unique year.
fire_yrs = fire_gdf['YEAR'].unique()
fire_start_yr = fire_yrs.min()

# update the fires years to only have the years with relevant data
fire_yrs = fire_yrs[fire_yrs > weather_start_yr]
del weather_start_yr

# init list of fire GeoDataFrames by year
fire_station_meta_gdfs = []

for year in fire_yrs:
    # merge with the nearest station
    fire_station_meta_gdf = get_nearest_station_to_fire(
        year = year,
        fire_gdf = fire_gdf,
        station_meatdata_gdf = weather_meatdata_gdf,
        distance_in_meters = True,
    )

    if fire_station_meta_gdf is None:
        continue
    
    # add to list
    fire_station_meta_gdfs.append(fire_station_meta_gdf)
    del fire_station_meta_gdf

# conbine the list into a dataframe
fire_station_meta_gdf = pd.concat(fire_station_meta_gdfs)

# unify geometry CRS
set_geometry_for_all_gdf(
    data = fire_station_meta_gdf,
    in_meters = False,
    data_geometry_columns = GEOMETRY_COLUMNS
)

# remane fire_goemetry to geometry
fire_station_meta_gdf.rename(
    {
        "geometry_fire": "geometry"
    }, 
    axis = 1,
    inplace = True
)

# set_geometry
fire_station_meta_gdf.set_geometry(
    "geometry",
    inplace = True
)

assert fire_station_meta_gdf.crs == fire_gdf.crs

In [12]:
# send data to db
db.send_gdf_to_db(
    gdf = fire_station_meta_gdf[[
        'REP_DATE', 
        'FIRST_DATE', 
        'LAST_DATE', 
        'CALC_HA', 
        'CAUSE', 
        'CLIMATE_ID', 
        'DISTANCE', 
        'PROV', 
        'UTC_OFFSET', 
        'ELEV_IN_M',  
        'geometry', 
    ]],
    table_name = TABLE_FIRE_NEAREST_WEATHER_METADATA,
    if_exists = 'replace',
    index = False,
)

In [None]:
# add keys to data for faster retrival

primary_key_statement = f"""ALTER TABLE "{TABLE_FIRE_NEAREST_WEATHER_METADATA}" ADD PRIMARY KEY ( "REP_DATE", "CALC_HA", "CAUSE" );"""
db.execute_sql(primary_key_statement)
alter_statement = f"""CREATE INDEX fire_weather_readings_index_station_id ON "{TABLE_FIRE_NEAREST_WEATHER_METADATA}" ( "CLIMATE_ID" );"""
db.execute_sql(alter_statement)
alter_statement = f"""CREATE INDEX fire_weather_readings_index_fire_date ON "{TABLE_FIRE_NEAREST_WEATHER_METADATA}" ( "REP_DATE" );"""
db.execute_sql(alter_statement)
alter_statement = f"""CREATE INDEX fire_weather_readings_index_area_burnt ON "{TABLE_FIRE_NEAREST_WEATHER_METADATA}" ( "CALC_HA" );"""
db.execute_sql(alter_statement)
alter_statement = f"""CREATE INDEX fire_weather_readings_index_cause ON "{TABLE_FIRE_NEAREST_WEATHER_METADATA}" ( "CAUSE" );"""
db.execute_sql(alter_statement)


Execution started --> CREATE INDEX fire_weather_readings_index_station_id ON "fire_nearest_weather_metadata" ( "CLIMATE_ID" );
Exectution completed --> CREATE INDEX fire_weather_readings_index_station_id ON "fire_nearest_weather_metadata" ( "CLIMATE_ID" );
Execution started --> CREATE INDEX fire_weather_readings_index_fire_date ON "fire_nearest_weather_metadata" ( "REP_DATE" );
Exectution completed --> CREATE INDEX fire_weather_readings_index_fire_date ON "fire_nearest_weather_metadata" ( "REP_DATE" );
Execution started --> CREATE INDEX fire_weather_readings_index_area_burnt ON "fire_nearest_weather_metadata" ( "CALC_HA" );
Exectution completed --> CREATE INDEX fire_weather_readings_index_area_burnt ON "fire_nearest_weather_metadata" ( "CALC_HA" );


# Test Read Data

In [14]:
gpd.read_postgis(
    sql = f"""SELECT * from "{TABLE_FIRE_NEAREST_WEATHER_METADATA}"; """,
    con = db.connection,
    geom_col = 'geometry',
)

Unnamed: 0,REP_DATE,FIRST_DATE,LAST_DATE,CALC_HA,CAUSE,CLIMATE_ID,DISTANCE,PROV,UTC_OFFSET,ELEV_IN_M,geometry
0,1999-05-08,1998-01-01,2017-12-31,87.510765,L,7080468,41849.916203,QC,-5,181.4,"POLYGON Z ((-79.3982 46.85921 0, -79.39835 46...."
1,1999-08-14,1998-01-01,2017-12-31,711.829603,L,6075420,257816.672068,ON,-5,9.1,"POLYGON Z ((-82.94662 51.34879 0, -82.95385 51..."
2,1999-08-29,1998-01-01,2017-12-31,1529.722973,L,6075420,382851.735776,ON,-5,9.1,"POLYGON Z ((-83.70413 52.22637 0, -83.70561 52..."
3,1999-06-23,1998-01-01,2017-12-31,152.116662,H,6106001,52838.945430,ON,-5,114.9,"POLYGON Z ((-76.13139 45.26008 0, -76.13073 45..."
4,1999-04-30,1998-01-01,2017-12-31,48602.376269,H,6048262,170235.249319,ON,-5,199.3,"MULTIPOLYGON Z (((-88.89922 49.37552 0, -88.89..."
...,...,...,...,...,...,...,...,...,...,...,...
19828,2017-08-08,1998-01-01,2017-12-31,535.083751,L,6014353,79442.762054,ON,-5,253.4,"MULTIPOLYGON Z (((-87.51317 52.53632 0, -87.51..."
19829,2017-07-25,1998-01-01,2017-12-31,58.946708,L,6014353,443814.271373,ON,-5,253.4,"POLYGON Z ((-83.99349 52.58657 0, -83.99378 52..."
19830,2017-08-11,1998-01-01,2017-12-31,85.047414,L,6010735,174730.541776,ON,-6,222.5,"POLYGON Z ((-89.75531 52.8835 0, -89.75758 52...."
19831,2017-07-29,1998-01-01,2017-12-31,166.976104,L,6014353,197633.516985,ON,-5,253.4,"MULTIPOLYGON Z (((-87.45332 51.13371 0, -87.45..."
