In [1]:
%%writefile ../../src/utils/dataset.py
from enum import Enum

# define an dataset enum
class Dataset(Enum):
    FIRE = {
        'id': 'fire',
        'index_col': 'division_id',
        'is_geo': False,
        'date_column': 'start_date',
        'data_columns': [
            'start_date', 
            'area_burnt_ha'
        ]
    }
    SUBDIVISION = {
        'id': 'subdivision',
        'index_col': "cid",
        'is_geo': True,
        'date_column': None,
        'data_columns': None
    }
    WEATHER = {
        'id': 'weather',
        'index_col': 'division_id',
        'is_geo': False,
        'date_column': 'date',
        'data_columns': ['']
    }
    LIGHTNING = {
        'id': 'lightning',
        'index_col': 'division_id',
        'is_geo': False,
        'date_column': 'timestamp',
        'data_columns': [
            'multiplicity_sum', 
            'multiplicity_min', 
            'multiplicity_max', 
            'multiplicity_mean', 
            'event_strength_kiloamperes_mean', 
            'event_strength_kiloamperes_min',
            'event_strength_kiloamperes_max'
        ]
    }

Overwriting ../../src/utils/dataset.py


In [2]:
%%writefile ../../src/utils/generate_subdivision.py
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
from pandas import read_sql, DataFrame
from geopandas import read_postgis, GeoDataFrame
from utils.dataset import Dataset


class GenSubdivision():
    def __init__(
            self,
            d_full:Dataset,
            s:Dataset = Dataset.SUBDIVISION,
            db_url:URL = None
    ) -> None:
        self.engine = create_engine(db_url)
        self.d_full = d_full

    def __get_subdivion_data_query(self) -> str:
        query =  """SELECT * FROM "S";"""
        return query
    
    def __get_lightning_data_query(self) -> str:
        query =  """SELECT * FROM "L_s";"""
        return query 
    
    def __get_fire_data_query(self) -> str:
        query = """ 
            SELECT
                fs.division_id,
                fs.start_date,
                fs.area_burnt_ha
            FROM 
                "F_s" fs
            WHERE
                fs.cause = 'L'
        """
        return query
    
    def __get_weather_data_query(self) -> str:
        query = """select * from "W_ms";"""
        return query
    
    def __read_geodata(
            self, 
            query:str, 
            index_col:str = None, 
            geom_col:str = 'geometry',
            crs:str = "EPSG:4326"
    ) -> GeoDataFrame:
        data = read_postgis(
            sql = query,
            con = self.engine,
            geom_col = geom_col,
            index_col = index_col,
            crs = crs
        )
        return data
    
    def __read_data(
            self,
            query:str,
            index_col:str = None,
    ) -> DataFrame:
        data = read_sql(
            sql = query,
            con = self.engine,
            index_col = index_col
        )
        return data

    def get_subdivision_dataset(self) -> GeoDataFrame:
        subdivision_data_query = self.__get_subdivion_data_query()
        subdivion_data = self.__read_geodata(
            subdivision_data_query,
            index_col = Dataset.SUBDIVISION.value['index_col']
        )
        return subdivion_data
    
    def __get_data_query(self) -> str:
        """ Generates the query to get the appropriate data

        Raises:
            ValueError: The dataset type is invalid.

        Returns:
            str: dataset query
        """
        if self.d_full == Dataset.LIGHTNING:
            return self.__get_lightning_data_query()
        elif self.d_full == Dataset.WEATHER:
            return self.__get_weather_data_query()
        elif self.d_full == Dataset.FIRE:
            return self.__get_fire_data_query()
        else:
            raise ValueError("Invalid return dataset type!!!")
    
    def gen_subdivisions(self):
        data_query = self.__get_data_query()
        if self.d_full.value['is_geo']:
            data = self.__read_geodata(
                data_query,
                index_col = self.d_full.value['index_col']
            )
        else:
            data = self.__read_data(
                data_query,
                index_col = self.d_full.value['index_col']
            )
        d_map = data.groupby(
            by = self.d_full.value['index_col']
        )

        return d_map

Overwriting ../../src/utils/generate_subdivision.py


In [3]:
import os
from dotenv import load_dotenv

from sqlalchemy.engine import URL

import sys
src_path = "../../src/"
sys.path.append(src_path)
from utils.generate_subdivision import GenSubdivision
from utils.dataset import Dataset

In [4]:
PATH_TO_DOT_ENV = "../../.env"

DATABASE_TYPE = "postgresql"
DATABASE_HOST = "localhost"


In [5]:
load_dotenv(PATH_TO_DOT_ENV)

DATABASE_NAME = os.environ.get("DATABASE_NAME")
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_HOST_PORT = os.environ.get("POSTGRES_HOST_PORT")
POSTGRES_CONTAINER_PORT = os.environ.get("POSTGRES_CONTAINER_PORT")

In [6]:
DATABASE_URL = URL.create(
    DATABASE_TYPE,
    username=POSTGRES_USER,
    password=POSTGRES_PASSWORD,  # plain (unescaped) text
    host=DATABASE_HOST,
    port=POSTGRES_HOST_PORT,
    database=DATABASE_NAME,
)

In [7]:
fire_generator = GenSubdivision(
    d_full = Dataset.FIRE,
    db_url = DATABASE_URL
)
f_map = fire_generator.gen_subdivisions()

for s_id, f_data in f_map:
    if s_id == 71:
        break

w_generator = GenSubdivision(
    d_full = Dataset.WEATHER,
    db_url = DATABASE_URL
)
w_map = w_generator.gen_subdivisions()

for s_id, w_data in w_map:
    if s_id == 71:
        break

l_generator = GenSubdivision(
    d_full = Dataset.LIGHTNING,
    db_url = DATABASE_URL
)
l_map = l_generator.gen_subdivisions()

for s_id, l_data in l_map:
    if s_id == 71:
        break

In [8]:
f_data

Unnamed: 0_level_0,start_date,area_burnt_ha
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1
71,1919-07-01,1094.543856
71,1919-07-25,517.811840
71,1919-07-30,1504.863833
71,1919-08-01,7.385316
71,1919-08-01,78.171012
...,...,...
71,2020-08-17,34.179079
71,2020-08-17,380.653945
71,2020-08-18,3.529941
71,2020-08-18,6.553449


In [9]:
l_data

Unnamed: 0_level_0,timestamp,multiplicity_sum,multiplicity_min,multiplicity_max,multiplicity_mean,event_strength_kiloamperes_mean,event_strength_kiloamperes_min,event_strength_kiloamperes_max
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
71,1999-02-01,4,1.0,1.0,1.000000,39.575000,22.6,70.4
71,1999-02-04,1,1.0,1.0,1.000000,80.300000,80.3,80.3
71,1999-02-05,4,1.0,1.0,1.000000,86.500000,24.9,191.1
71,1999-02-06,7,1.0,2.0,1.166667,82.033333,39.1,120.2
71,1999-02-09,2,1.0,1.0,1.000000,39.000000,17.6,60.4
...,...,...,...,...,...,...,...,...
71,2022-06-20,4,1.0,1.0,1.000000,35.825000,25.8,60.1
71,2022-06-22,20,1.0,2.0,1.052632,23.747368,10.2,56.6
71,2022-06-23,21,1.0,2.0,1.105263,34.752632,12.1,107.8
71,2022-06-28,271,1.0,3.0,1.101626,26.507317,2.7,125.2


In [10]:
w_data

Unnamed: 0_level_0,climate_ID,first_yr,last_yr,geometry
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
71,1100031,1998,2017,0101000020E6100000D7A3703D0A975EC0A4703D0AD783...
71,1100119,1998,2017,0101000020E6100000713D0AD7A3705EC01F85EB51B89E...
71,1020590,2000,2017,0101000020E61000000AD7A3703D0A5FC0CDCCCCCCCCAC...
71,1160H99,1998,2017,0101000020E6100000C3F5285C8FD25DC0713D0AD7A310...
71,1101300,2005,2017,0101000020E6100000D7A3703D0AC75EC052B81E85EB11...
71,1161661,1998,2013,0101000020E61000000000000000605EC08FC2F5285C8F...
71,1021830,1998,2017,0101000020E61000009A99999999395FC05C8FC2F528DC...
71,1022795,2001,2017,0101000020E61000008FC2F5285C7F5FC09A9999999939...
71,1113542,2005,2017,0101000020E61000000000000000605EC08FC2F5285CAF...
71,1163781,1998,2017,0101000020E6100000CDCCCCCCCC1C5EC09A9999999959...
