In [14]:
# ! pip install geopandas pandas sqlalchemy psycopg2-binary matplotlib geoalchemy2 python-dotenv

# Import Block 

In [1]:
import geopandas as gpd
import pandas as pd
from sqlalchemy import TIMESTAMP

import os
from dotenv import load_dotenv

import sys
sys.path.append("../src")

from utils.Database import Database

# Load Environment

In [2]:
load_dotenv(".env")

TABLE_FIRE = os.getenv("TABLE_FIRE")
CNFDB_PATH = os.getenv("CNFDB_PATH")

# Constants

In [3]:
SQL_DTYPES = {
    "REP_DATE": TIMESTAMP,
}

# Establish Database Connection

In [4]:
db = Database()

Connection Established!!!
	Engine(postgresql://wireaiadmin:***@localhost:5434/weather_db)


# Read Data

In [5]:
%%time
# read fire data
fire_gdf = gpd.read_file(CNFDB_PATH)
fire_gdf = fire_gdf.to_crs("EPSG:4326")

CPU times: user 25.3 s, sys: 1.06 s, total: 26.4 s
Wall time: 26.4 s


# Data Pre-Processing

In [6]:
# drop data with no dates
fire_gdf = fire_gdf[fire_gdf['REP_DATE'] != "0000/00/00"]

In [7]:
# drop columns that are not important
fire_gdf.drop(
    [
        'SRC_AGENCY', 
        'FIRE_ID', 
        'FIRENAME', 
        'SIZE_HA',
        'DECADE', 
        'ACQ_DATE',
        'POLY_DATE',
        'MORE_INFO', 
        'CFS_REF_ID', 
        'CFS_NOTE1', 
        'CFS_NOTE2', 
        'AG_SRCFILE'
    ], 
    axis=1, 
    inplace=True
)

In [8]:
# convert to datetime
fire_gdf['REP_DATE'] = pd.to_datetime(fire_gdf['REP_DATE'], format='%Y/%m/%d')

In [9]:
# drop duplicates
print(f"Size before drop of duplicated: {len(fire_gdf)}")
fire_gdf.drop_duplicates(inplace=True)
print(f"Size after drop of duplicated: {len(fire_gdf)}")

Size before drop of duplicated: 47545
Size after drop of duplicated: 47543


In [10]:
%%time
# send to DB

db.send_gdf_to_db(
    gdf = fire_gdf,
    table_name = TABLE_FIRE,
    dtypes = SQL_DTYPES
)

CPU times: user 1min 2s, sys: 4.28 s, total: 1min 6s
Wall time: 2min 18s


In [14]:
# add keys to data for faster retrival

primary_key_statement = f"""ALTER TABLE "{TABLE_FIRE}" ADD PRIMARY KEY ( "REP_DATE", "CALC_HA", "CAUSE" );"""
db.execute_sql(primary_key_statement)

Execution started --> ALTER TABLE "fire" ADD PRIMARY KEY ( "REP_DATE", "CALC_HA", "CAUSE" );
Exectution completed --> ALTER TABLE "fire" ADD PRIMARY KEY ( "REP_DATE", "CALC_HA", "CAUSE" );


# Test Read Data

In [15]:
fire_gdf = gpd.read_postgis(
    sql = f"""SELECT * from "{TABLE_FIRE}"; """,
    con = db.connection,
    geom_col = "geometry"
)

fire_gdf

Unnamed: 0,YEAR,MONTH,DAY,REP_DATE,DATE_TYPE,OUT_DATE,CALC_HA,CAUSE,MAP_SOURCE,SOURCE_KEY,MAP_METHOD,WATER_REM,UNBURN_REM,SRC_AGY2,geometry
0,2004,6,23,2004-06-23,Report date,0000/00/00,1370.507344,L,Satellite imagery,LANDSAT5,digitized,,,BC,"POLYGON Z ((-124.6106 53.08084 0, -124.6099 53..."
1,2004,6,23,2004-06-23,Report date,0000/00/00,520.796287,L,Satellite imagery,LANDSAT5,digitized,,,BC,"POLYGON Z ((-124.48262 53.06162 0, -124.4815 5..."
2,2004,6,20,2004-06-20,Report date,0000/00/00,268.290572,L,Satellite imagery,LANDSAT5,digitized,,,BC,"POLYGON Z ((-125.1276 52.13023 0, -125.12726 5..."
3,2004,6,21,2004-06-21,Report date,0000/00/00,20506.415129,L,Satellite imagery,LANDSAT5,Modified from Protection,,,BC,"POLYGON Z ((-125.75419 52.27968 0, -125.75294 ..."
4,2004,6,22,2004-06-22,Report date,0000/00/00,2408.587142,L,Satellite imagery,LANDSAT5,digitized,,,BC,"POLYGON Z ((-126.26477 52.9988 0, -126.26379 5..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47538,1985,7,29,1985-07-29,,0000/00/00,42.102471,L,NWT legacy,,DIGI SCENE: L5_1985_Sept9_5317,,,PC,"POLYGON Z ((-126.12909 61.15804 0, -126.12953 ..."
47539,1984,8,9,1984-08-09,,0000/00/00,256.750436,L,NWT / Landsat,,DIGI SCENE: L5_1984_Sept13_5417,,,PC,"POLYGON Z ((-126.7304 60.91676 0, -126.72941 6..."
47540,1984,6,26,1984-06-26,,0000/00/00,45.481300,U,Satellite imagery,Landsat,DIGI SCENE: L5_1984_July26_5616,,,PC,"POLYGON Z ((-129.60079 62.89984 0, -129.60147 ..."
47541,1992,7,1,1992-07-01,,0000/00/00,654.724359,L,NWT legacy,,DIGI SCENE: L5_1993_Aug19_5616,,,PC,"MULTIPOLYGON Z (((-128.73015 62.52788 0, -128...."
