In [1]:
# ! pip install geopandas pandas sqlalchemy psycopg2-binary openpyxl geoalchemy2 python-dotenv

# Import Block 

In [2]:
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine, TIMESTAMP

import os
from dotenv import load_dotenv

# Global Constants

In [3]:
CNFDB_SHPAEFLIE_PATH = "../../data/fire/NFDB_poly.zip"
PATH_TO_DOT_ENV = "../../.env"

DATABASE_TYPE = "postgresql"
DATABASE_HOST = "localhost"

CNFDB_TABLE_NAME = "F"

In [4]:
load_dotenv(PATH_TO_DOT_ENV)

DATABASE_NAME = os.environ.get("DATABASE_NAME")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_HOST_PORT = os.environ.get("POSTGRES_HOST_PORT")
POSTGRES_CONTAINER_PORT = os.environ.get("POSTGRES_CONTAINER_PORT")

In [5]:
engine = create_engine(f"{DATABASE_TYPE}://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DATABASE_HOST}:{POSTGRES_HOST_PORT}/{DATABASE_NAME}")

In [6]:
sql_dtypes = {
    "REP_DATE": TIMESTAMP,
    "ACQ_DATE": TIMESTAMP,
    "POLY_DATE": TIMESTAMP,
}

# Process Fire Data to BD

In [7]:
%%time
# read fire data
fire_gdf = gpd.read_file(CNFDB_SHPAEFLIE_PATH)
fire_gdf = fire_gdf.to_crs("EPSG:4326")

CPU times: user 1min 35s, sys: 1.31 s, total: 1min 36s
Wall time: 1min 36s


In [8]:
# convert to datetime
fire_gdf['REP_DATE'] = pd.to_datetime(fire_gdf['REP_DATE'], format='%Y-%m-%d')
fire_gdf['ACQ_DATE'] = pd.to_datetime(fire_gdf['ACQ_DATE'], format='%Y-%m-%d')
fire_gdf['POLY_DATE'] = pd.to_datetime(fire_gdf['POLY_DATE'], format='%Y-%m-%d')

In [9]:
# filter to only lighting 
fire_gdf = fire_gdf[fire_gdf["CAUSE"] == 'L']

In [10]:
# drop duplicates
fire_gdf.drop_duplicates(inplace=True)

In [11]:
%%time
# send to DB
fire_gdf.to_postgis(name=CNFDB_TABLE_NAME, con=engine, if_exists='replace', index=False, dtype=sql_dtypes)

CPU times: user 48 s, sys: 2.89 s, total: 50.9 s
Wall time: 1min 33s
