In [9]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine

import os
from dotenv import load_dotenv

In [10]:
PATH_TO_DOT_ENV = "../.env"

DATABASE_TYPE = "postgresql"
DATABASE_HOST = "localhost"

CNFDB_TABLE_NAME = "F"
FIRE_SUBDIVISION_TABLE = "F_s"
SUBDIVISON_TABLE_NAME = "S"

In [11]:
load_dotenv(PATH_TO_DOT_ENV)

DATABASE_NAME = os.environ.get("DATABASE_NAME")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_HOST_PORT = os.environ.get("POSTGRES_HOST_PORT")
POSTGRES_CONTAINER_PORT = os.environ.get("POSTGRES_CONTAINER_PORT")

In [12]:
engine = create_engine(f"{DATABASE_TYPE}://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DATABASE_HOST}:{POSTGRES_HOST_PORT}/{DATABASE_NAME}")

In [13]:
query_fire = f"""select f."YEAR", f."MONTH", f."DAY", f."REP_DATE", f."CALC_HA", f."CAUSE", f."geometry" from "{CNFDB_TABLE_NAME}" f"""
query_subdivision = f"""select * from "{SUBDIVISON_TABLE_NAME}" cfd"""

In [14]:
fire_gdf = gpd.read_postgis(
    sql=query_fire, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326"
)  

In [15]:
fire_gdf

Unnamed: 0,YEAR,MONTH,DAY,REP_DATE,CALC_HA,CAUSE,geometry
0,2006,7,3,2006-07-03,115.575054,L,"POLYGON Z ((-121.33054 57.45929 0.00000, -121...."
1,2006,7,2,2006-07-02,5068.434802,L,"POLYGON Z ((-122.94524 57.66044 0.00000, -122...."
2,2006,7,2,2006-07-02,96.007295,L,"POLYGON Z ((-121.47328 57.62710 0.00000, -121...."
3,2006,7,3,2006-07-03,748.427603,L,"POLYGON Z ((-121.88672 57.64982 0.00000, -121...."
4,2006,7,3,2006-07-03,3905.789898,L,"POLYGON Z ((-121.80853 57.01514 0.00000, -121...."
...,...,...,...,...,...,...,...
25592,2006,6,23,2006-06-23,0.854552,L,"POLYGON Z ((-120.38838 57.32001 0.00000, -120...."
25593,2006,7,2,2006-07-02,866.813547,L,"POLYGON Z ((-121.54182 57.25444 0.00000, -121...."
25594,2006,9,7,2006-09-07,14.151098,L,"POLYGON Z ((-121.52591 57.25729 0.00000, -121...."
25595,2006,7,2,2006-07-02,3258.246480,L,"POLYGON Z ((-121.89852 57.40242 0.00000, -121...."


In [16]:
division_gdf = gpd.read_postgis(
    sql=query_subdivision, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326",)  

In [17]:
division_gdf

Unnamed: 0,cid,geometry
0,0,"MULTIPOLYGON (((-139.53267 69.53197, -139.5336..."
1,1,"MULTIPOLYGON (((-59.17563 47.55863, -59.17579 ..."
2,2,"MULTIPOLYGON (((-112.70518 67.67319, -112.7050..."
3,3,"MULTIPOLYGON (((-82.52584 42.47577, -82.52580 ..."
4,4,"POLYGON ((-112.31319 55.39631, -112.31267 55.3..."
...,...,...
62,62,"POLYGON ((-121.20810 50.56893, -121.20824 50.5..."
63,63,"POLYGON ((-118.79457 54.58827, -118.74824 54.5..."
64,64,"MULTIPOLYGON (((-121.00702 49.65631, -121.0066..."
65,65,"MULTIPOLYGON (((-80.76446 45.82386, -80.76447 ..."


In [18]:
fire_in_subdivision_gdf = gpd.sjoin(
    fire_gdf,
    division_gdf,
    how="inner",
    predicate="within").drop("index_right", axis=1)

In [19]:
fire_in_subdivision_gdf = fire_in_subdivision_gdf.rename(
    {
        "YEAR": "year",
        "MONTH": "month",
        "DAY": "day",
        "REP_DATE": "start_date",
        "CALC_HA": "area_burnt_ha",
        "CAUSE": "cause",
        "cid": "division_id"
    },
    axis = 1
)

In [20]:
fire_in_subdivision_gdf

Unnamed: 0,year,month,day,start_date,area_burnt_ha,cause,geometry,division_id
0,2006,7,3,2006-07-03,115.575054,L,"POLYGON Z ((-121.33054 57.45929 0.00000, -121....",5
1,2006,7,2,2006-07-02,5068.434802,L,"POLYGON Z ((-122.94524 57.66044 0.00000, -122....",5
2,2006,7,2,2006-07-02,96.007295,L,"POLYGON Z ((-121.47328 57.62710 0.00000, -121....",5
3,2006,7,3,2006-07-03,748.427603,L,"POLYGON Z ((-121.88672 57.64982 0.00000, -121....",5
4,2006,7,3,2006-07-03,3905.789898,L,"POLYGON Z ((-121.80853 57.01514 0.00000, -121....",5
...,...,...,...,...,...,...,...,...
24553,1996,6,11,1996-06-11,594.510064,L,"POLYGON Z ((-66.53226 46.98190 0.00000, -66.53...",40
24616,2007,6,20,2007-06-20,8.810532,L,"POLYGON Z ((-66.84950 47.18863 0.00000, -66.84...",40
24564,1998,6,25,1998-06-25,4782.265756,L,"MULTIPOLYGON Z (((-54.53030 48.63778 0.00000, ...",1
24566,1999,6,26,1999-06-26,3850.219982,L,"POLYGON Z ((-56.07293 48.97471 0.00000, -56.07...",1


In [21]:
# group to indexes
fire_in_subdivision_gdf = fire_in_subdivision_gdf.groupby(["division_id","start_date","area_burnt_ha","cause",]).first().reset_index()

In [22]:
# drop duplicates
fire_in_subdivision_gdf.drop_duplicates(inplace=True)

In [23]:
fire_in_subdivision_gdf.to_postgis(name=FIRE_SUBDIVISION_TABLE, con=engine, if_exists='replace', index=False)

  srid = _get_srid_from_crs(gdf)


In [24]:
with engine.connect() as con:
    con.execute(f'ALTER TABLE "{FIRE_SUBDIVISION_TABLE}" ADD PRIMARY KEY ("division_id","start_date","area_burnt_ha","cause");')

In [25]:
# # Operation too long
# fire_overlaps_subdivision_gdf = gpd.sjoin(
#     fire_not_in_sub_division,
#     division_gdf,
#     how="inner",
#     predicate="overlaps").drop("index_right", axis=1)

In [26]:
fire_in_subdivision_gdf

Unnamed: 0,division_id,start_date,area_burnt_ha,cause,year,month,day,geometry
0,0,1960-05-10,6422.863612,L,1960,5,10,"POLYGON Z ((-136.74856 66.77644 0.00000, -136...."
1,0,1960-06-12,679.814663,L,1960,6,12,"POLYGON Z ((-136.63157 65.80181 0.00000, -136...."
2,0,1961-07-20,106.663293,L,1961,7,20,"POLYGON Z ((-126.54247 60.54400 0.00000, -126...."
3,0,1961-07-20,7583.407826,L,1961,7,20,"POLYGON Z ((-125.25890 60.55448 0.00000, -125...."
4,0,1961-07-20,22645.797871,L,1961,7,20,"MULTIPOLYGON Z (((-126.15643 60.48821 0.00000,..."
...,...,...,...,...,...,...,...,...
24979,66,2014-07-30,0.049777,L,2014,7,30,"POLYGON Z ((-115.69999 51.69989 0.00000, -115...."
24980,66,2014-07-31,0.163096,L,2014,7,31,"POLYGON Z ((-115.41916 51.25769 0.00000, -115...."
24981,66,2014-07-31,0.998153,L,2014,7,31,"POLYGON Z ((-115.39989 51.14951 0.00000, -115...."
24982,66,2014-11-07,2287.772307,L,2014,11,7,"MULTIPOLYGON Z (((-116.71528 51.96098 0.00000,..."


: 