In [1]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine

import os
from dotenv import load_dotenv

In [2]:
PATH_TO_DOT_ENV = "../../.env"

DATABASE_TYPE = "postgresql"
DATABASE_HOST = "localhost"

CNFDB_TABLE_NAME = "F"
FIRE_SUBDIVISION_TABLE = "F_s"
SUBDIVISON_TABLE_NAME = "S"

In [3]:
load_dotenv(PATH_TO_DOT_ENV)

DATABASE_NAME = os.environ.get("DATABASE_NAME")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_HOST_PORT = os.environ.get("POSTGRES_HOST_PORT")
POSTGRES_CONTAINER_PORT = os.environ.get("POSTGRES_CONTAINER_PORT")

In [4]:
engine = create_engine(f"{DATABASE_TYPE}://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DATABASE_HOST}:{POSTGRES_HOST_PORT}/{DATABASE_NAME}")

In [5]:
query_fire = f"""select f."YEAR", f."MONTH", f."DAY", f."REP_DATE", f."CALC_HA", f."CAUSE", f."geometry" from "{CNFDB_TABLE_NAME}" f"""
query_subdivision = f"""select * from "{SUBDIVISON_TABLE_NAME}" cfd"""

In [6]:
fire_gdf = gpd.read_postgis(
    sql=query_fire, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326"
)  

In [7]:
fire_gdf

Unnamed: 0,YEAR,MONTH,DAY,REP_DATE,CALC_HA,CAUSE,geometry
0,2006,7,3,2006-07-03,115.575054,L,"POLYGON Z ((-121.33054 57.45929 0.00000, -121...."
1,2006,7,2,2006-07-02,5068.434802,L,"POLYGON Z ((-122.94524 57.66044 0.00000, -122...."
2,2006,7,2,2006-07-02,96.007295,L,"POLYGON Z ((-121.47328 57.62710 0.00000, -121...."
3,2006,7,3,2006-07-03,748.427603,L,"POLYGON Z ((-121.88672 57.64982 0.00000, -121...."
4,2006,7,3,2006-07-03,3905.789898,L,"POLYGON Z ((-121.80853 57.01514 0.00000, -121...."
...,...,...,...,...,...,...,...
25592,2006,6,23,2006-06-23,0.854552,L,"POLYGON Z ((-120.38838 57.32001 0.00000, -120...."
25593,2006,7,2,2006-07-02,866.813547,L,"POLYGON Z ((-121.54182 57.25444 0.00000, -121...."
25594,2006,9,7,2006-09-07,14.151098,L,"POLYGON Z ((-121.52591 57.25729 0.00000, -121...."
25595,2006,7,2,2006-07-02,3258.246480,L,"POLYGON Z ((-121.89852 57.40242 0.00000, -121...."


In [8]:
division_gdf = gpd.read_postgis(
    sql=query_subdivision, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326"
)  

In [9]:
division_gdf

Unnamed: 0,cid,geometry
0,0,"MULTIPOLYGON (((-139.53267 69.53197, -139.5336..."
1,5,"POLYGON ((-127.71250 60.00003, -127.70000 60.0..."
2,6,"MULTIPOLYGON (((-79.55767 51.52529, -79.55795 ..."
3,7,"MULTIPOLYGON (((-94.82510 59.99778, -94.82801 ..."
4,16,"MULTIPOLYGON (((-135.34592 68.69194, -135.3470..."
5,18,"POLYGON ((-124.35098 54.26470, -124.34870 54.2..."
6,25,"POLYGON ((-115.86210 58.76655, -115.44245 58.7..."
7,27,"MULTIPOLYGON (((-132.99704 53.53886, -132.9969..."
8,29,"MULTIPOLYGON (((-95.06221 49.36099, -95.06249 ..."
9,51,"POLYGON ((-114.00008 59.37318, -114.00006 59.2..."


In [10]:
fire_in_subdivision_gdf = gpd.sjoin(
    fire_gdf,
    division_gdf,
    how="inner",
    predicate="within").drop("index_right", axis=1)

In [11]:
fire_in_subdivision_gdf = fire_in_subdivision_gdf.rename(
    {
        "YEAR": "year",
        "MONTH": "month",
        "DAY": "day",
        "REP_DATE": "start_date",
        "CALC_HA": "area_burnt_ha",
        "CAUSE": "cause",
        "cid": "division_id"
    },
    axis = 1
)

In [12]:
fire_in_subdivision_gdf

Unnamed: 0,year,month,day,start_date,area_burnt_ha,cause,geometry,division_id
0,2006,7,3,2006-07-03,115.575054,L,"POLYGON Z ((-121.33054 57.45929 0.00000, -121....",5
1,2006,7,2,2006-07-02,5068.434802,L,"POLYGON Z ((-122.94524 57.66044 0.00000, -122....",5
2,2006,7,2,2006-07-02,96.007295,L,"POLYGON Z ((-121.47328 57.62710 0.00000, -121....",5
3,2006,7,3,2006-07-03,748.427603,L,"POLYGON Z ((-121.88672 57.64982 0.00000, -121....",5
4,2006,7,3,2006-07-03,3905.789898,L,"POLYGON Z ((-121.80853 57.01514 0.00000, -121....",5
...,...,...,...,...,...,...,...,...
25420,1986,7,17,1986-07-17,78.981209,L,"POLYGON Z ((-127.59392 62.32057 0.00000, -127....",16
25421,1985,7,29,1985-07-29,42.102471,L,"POLYGON Z ((-126.12909 61.15804 0.00000, -126....",16
25422,1984,8,9,1984-08-09,256.750436,L,"POLYGON Z ((-126.73040 60.91676 0.00000, -126....",16
25423,1992,7,1,1992-07-01,654.724359,L,"MULTIPOLYGON Z (((-128.73015 62.52788 0.00000,...",16


In [13]:
# group to indexes
fire_in_subdivision_gdf = fire_in_subdivision_gdf.groupby(["division_id","start_date","area_burnt_ha","cause",]).first().reset_index()

In [14]:
# drop duplicates
fire_in_subdivision_gdf.drop_duplicates(inplace=True)

In [15]:
fire_in_subdivision_gdf.to_postgis(
    name=FIRE_SUBDIVISION_TABLE, 
    con=engine, 
    if_exists='replace', 
    index=False
)

  srid = _get_srid_from_crs(gdf)


In [16]:
with engine.connect() as con:
    con.execute(f'ALTER TABLE "{FIRE_SUBDIVISION_TABLE}" ADD PRIMARY KEY ("division_id","start_date","area_burnt_ha","cause");')

In [25]:
# # Operation too long
# fire_overlaps_subdivision_gdf = gpd.sjoin(
#     fire_not_in_sub_division,
#     division_gdf,
#     how="inner",
#     predicate="overlaps").drop("index_right", axis=1)

In [17]:
fire_in_subdivision_gdf

Unnamed: 0,division_id,start_date,area_burnt_ha,cause,year,month,day,geometry
0,0,1960-05-10,6422.863612,L,1960,5,10,"POLYGON Z ((-136.74856 66.77644 0.00000, -136...."
1,0,1960-06-12,679.814663,L,1960,6,12,"POLYGON Z ((-136.63157 65.80181 0.00000, -136...."
2,0,1961-07-20,106.663293,L,1961,7,20,"POLYGON Z ((-126.54247 60.54400 0.00000, -126...."
3,0,1961-07-20,7583.407826,L,1961,7,20,"POLYGON Z ((-125.25890 60.55448 0.00000, -125...."
4,0,1961-07-20,22645.797871,L,1961,7,20,"MULTIPOLYGON Z (((-126.15643 60.48821 0.00000,..."
...,...,...,...,...,...,...,...,...
25015,73,2019-07-26,42.128584,L,2019,7,26,"POLYGON Z ((-103.21818 58.63746 0.00000, -103...."
25016,73,2019-07-27,68.698008,L,2019,7,27,"POLYGON Z ((-104.69448 56.97894 0.00000, -104...."
25017,73,2019-07-29,179.067213,L,2019,7,29,"POLYGON Z ((-104.02730 58.87546 0.00000, -104...."
25018,73,2020-06-28,133.992125,L,2020,6,28,"MULTIPOLYGON Z (((-106.59045 58.58776 0.00000,..."
