In [1]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine

import os
from dotenv import load_dotenv

In [2]:
PATH_TO_DOT_ENV = "../../.env"

DATABASE_TYPE = "postgresql"
DATABASE_HOST = "localhost"

CNFDB_TABLE_NAME = "F"
FIRE_SUBDIVISION_TABLE = "F_s"
SUBDIVISON_TABLE_NAME = "S"

In [3]:
load_dotenv(PATH_TO_DOT_ENV)

DATABASE_NAME = os.environ.get("DATABASE_NAME")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_HOST_PORT = os.environ.get("POSTGRES_HOST_PORT")
POSTGRES_CONTAINER_PORT = os.environ.get("POSTGRES_CONTAINER_PORT")

In [4]:
engine = create_engine(f"{DATABASE_TYPE}://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DATABASE_HOST}:{POSTGRES_HOST_PORT}/{DATABASE_NAME}")

In [5]:
query_fire = f"""select f."YEAR", f."MONTH", f."DAY", f."REP_DATE", f."CALC_HA", f."CAUSE", f."geometry" from "{CNFDB_TABLE_NAME}" f"""
query_subdivision = f"""select * from "{SUBDIVISON_TABLE_NAME}" cfd"""

In [6]:
fire_gdf = gpd.read_postgis(
    sql=query_fire, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326"
)  

In [7]:
fire_gdf

Unnamed: 0,YEAR,MONTH,DAY,REP_DATE,CALC_HA,CAUSE,geometry
0,2008,8,18,2008-08-18,2.228881,L,"POLYGON Z ((-115.87804 50.17170 0.00000, -115...."
1,2008,8,18,2008-08-18,8.159434,L,"POLYGON Z ((-116.53969 50.80429 0.00000, -116...."
2,2008,8,16,2008-08-16,5.643911,L,"POLYGON Z ((-118.06441 51.84440 0.00000, -118...."
3,2008,7,10,2008-07-10,1.730790,L,"POLYGON Z ((-117.60696 49.01132 0.00000, -117...."
4,2008,7,1,2008-07-01,1.346359,L,"POLYGON Z ((-117.95915 50.20801 0.00000, -117...."
...,...,...,...,...,...,...,...
25592,2008,8,17,2008-08-17,2.689829,L,"POLYGON Z ((-115.66692 50.47834 0.00000, -115...."
25593,2008,8,18,2008-08-18,0.451117,L,"POLYGON Z ((-116.35879 50.94432 0.00000, -116...."
25594,2008,8,17,2008-08-17,16.384738,L,"POLYGON Z ((-115.68441 50.20491 0.00000, -115...."
25595,2008,8,17,2008-08-17,0.687931,L,"POLYGON Z ((-115.68427 50.20604 0.00000, -115...."


In [8]:
division_gdf = gpd.read_postgis(
    sql=query_subdivision, 
    con=engine,
    geom_col="geometry",
    crs="EPSG:4326",)  

In [9]:
division_gdf

Unnamed: 0,cid,geometry
0,0,"MULTIPOLYGON (((-139.53267 69.53197, -139.5336..."
1,5,"POLYGON ((-127.71250 60.00003, -127.70000 60.0..."
2,6,"MULTIPOLYGON (((-79.55767 51.52529, -79.55795 ..."
3,7,"MULTIPOLYGON (((-94.82510 59.99778, -94.82801 ..."
4,16,"MULTIPOLYGON (((-135.34592 68.69194, -135.3470..."
5,18,"POLYGON ((-124.35098 54.26470, -124.34870 54.2..."
6,25,"POLYGON ((-115.86210 58.76655, -115.44245 58.7..."
7,27,"MULTIPOLYGON (((-132.99704 53.53886, -132.9969..."
8,29,"MULTIPOLYGON (((-95.06221 49.36099, -95.06249 ..."
9,51,"POLYGON ((-114.00008 59.37318, -114.00006 59.2..."


In [10]:
fire_in_subdivision_gdf = gpd.sjoin(
    fire_gdf,
    division_gdf,
    how="inner",
    predicate="within").drop("index_right", axis=1)

In [11]:
fire_in_subdivision_gdf = fire_in_subdivision_gdf.rename(
    {
        "YEAR": "year",
        "MONTH": "month",
        "DAY": "day",
        "REP_DATE": "start_date",
        "CALC_HA": "area_burnt_ha",
        "CAUSE": "cause",
        "cid": "division_id"
    },
    axis = 1
)

In [12]:
fire_in_subdivision_gdf

Unnamed: 0,year,month,day,start_date,area_burnt_ha,cause,geometry,division_id
0,2008,8,18,2008-08-18,2.228881,L,"POLYGON Z ((-115.87804 50.17170 0.00000, -115....",55
1,2008,8,18,2008-08-18,8.159434,L,"POLYGON Z ((-116.53969 50.80429 0.00000, -116....",55
2,2008,8,16,2008-08-16,5.643911,L,"POLYGON Z ((-118.06441 51.84440 0.00000, -118....",55
3,2008,7,10,2008-07-10,1.730790,L,"POLYGON Z ((-117.60696 49.01132 0.00000, -117....",55
4,2008,7,1,2008-07-01,1.346359,L,"POLYGON Z ((-117.95915 50.20801 0.00000, -117....",71
...,...,...,...,...,...,...,...,...
25592,2008,8,17,2008-08-17,2.689829,L,"POLYGON Z ((-115.66692 50.47834 0.00000, -115....",55
25593,2008,8,18,2008-08-18,0.451117,L,"POLYGON Z ((-116.35879 50.94432 0.00000, -116....",55
25594,2008,8,17,2008-08-17,16.384738,L,"POLYGON Z ((-115.68441 50.20491 0.00000, -115....",55
25595,2008,8,17,2008-08-17,0.687931,L,"POLYGON Z ((-115.68427 50.20604 0.00000, -115....",55


In [13]:
# group to indexes
fire_in_subdivision_gdf = fire_in_subdivision_gdf.groupby(["division_id","start_date","area_burnt_ha","cause",]).first().reset_index()

In [14]:
# drop duplicates
fire_in_subdivision_gdf.drop_duplicates(inplace=True)

In [17]:
type(fire_in_subdivision_gdf)

geopandas.geodataframe.GeoDataFrame

In [20]:
# set CRS
fire_in_subdivision_gdf = fire_in_subdivision_gdf.set_crs("EPSG:4326")

In [21]:
fire_in_subdivision_gdf.to_postgis(name=FIRE_SUBDIVISION_TABLE, con=engine, if_exists='replace', index=False)

In [27]:
with engine.connect() as con:
    con.execute(f'ALTER TABLE "{FIRE_SUBDIVISION_TABLE}" ADD PRIMARY KEY ("division_id","start_date","area_burnt_ha","cause");')

In [None]:
# # Operation too long
# fire_overlaps_subdivision_gdf = gpd.sjoin(
#     fire_not_in_sub_division,
#     division_gdf,
#     how="inner",
#     predicate="overlaps").drop("index_right", axis=1)

In [25]:
fire_in_subdivision_gdf

Unnamed: 0,division_id,start_date,area_burnt_ha,cause,year,month,day,geometry
0,0,1960-05-10,6422.863612,L,1960,5,10,"POLYGON Z ((-136.74856 66.77644 0.00000, -136...."
1,0,1960-06-12,679.814663,L,1960,6,12,"POLYGON Z ((-136.63157 65.80181 0.00000, -136...."
2,0,1961-07-20,106.663293,L,1961,7,20,"POLYGON Z ((-126.54247 60.54400 0.00000, -126...."
3,0,1961-07-20,7583.407826,L,1961,7,20,"POLYGON Z ((-125.25890 60.55448 0.00000, -125...."
4,0,1961-07-20,22645.797871,L,1961,7,20,"MULTIPOLYGON Z (((-126.15643 60.48821 0.00000,..."
...,...,...,...,...,...,...,...,...
25015,73,2019-07-26,42.128584,L,2019,7,26,"POLYGON Z ((-103.21818 58.63746 0.00000, -103...."
25016,73,2019-07-27,68.698008,L,2019,7,27,"POLYGON Z ((-104.69448 56.97894 0.00000, -104...."
25017,73,2019-07-29,179.067213,L,2019,7,29,"POLYGON Z ((-104.02730 58.87546 0.00000, -104...."
25018,73,2020-06-28,133.992125,L,2020,6,28,"MULTIPOLYGON Z (((-106.59045 58.58776 0.00000,..."
