In [1]:
import os
import pandas as pd
import geopandas as gpd
import pygeos as pg
import numpy as np
import tensorflow as tf
import sqlalchemy as sq
import calendar
from dotenv import load_dotenv
from IPython.display import clear_output
from matplotlib import pyplot as plt
from DataService import DataService

2022-11-30 06:37:56.567614: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-30 06:37:56.915904: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# The following lines adjust the granularity of reporting.
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
pd.set_option('display.max_columns', None)
os.chdir('/tf')
PGUSER = os.getenv('POSTGRES_USER')
PGPW = os.getenv('POSTGRES_PW')
PGDB = os.getenv('POSTGRES_DB')

In [3]:
# Connect to the database
pullService = DataService(PGDB, PGUSER, PGPW)
db_pull_con = pullService.connect()

pushService = DataService(PGDB, PGUSER, PGPW)
db_push_con = pushService.connect()

In [4]:
provinceTable = "ProvincialBoundaries"
query5 = "SELECT * FROM public.\"{}\";".format(provinceTable)
dfProv = gpd.GeoDataFrame.from_postgis(query5, db_pull_con)

In [5]:
# Load the data
fireWeatherTable = "FireWeather"
query1 = "SELECT * FROM public.\"{}\";".format(fireWeatherTable)
dfFireWeather = pd.read_sql(query1, db_pull_con)

In [6]:
# Load the data
waterTable = "WaterSources"
query2 = "SELECT * FROM public.\"{}\";".format(waterTable)
dfWater = gpd.GeoDataFrame.from_postgis(query2, db_pull_con)

# plot to check
f1, ax1 = plt.subplots(figsize=(19.20, 10.80))
dfProv.boundary.plot(ax=ax1, color=None, edgecolor='black', linewidth=1, aspect=1)
dfWater.plot(ax=ax1, color='red', markersize=0.1, aspect=1)


In [7]:
# Load fire data
elevationTable = "SKElevationPoints"
query3 = "SELECT * FROM public.\"{}\" ;".format(elevationTable)
dfElevation = gpd.GeoDataFrame.from_postgis(query3, db_pull_con)

# drop rows where e_u_en is Feet
dfElevation = dfElevation[dfElevation['e_u_en'] != 'Feet']

dfElevation = dfElevation.to_crs(3347)
dfElevation.set_crs(3347, allow_override=True)
dfElevation.to_postgis('SKElevationPoints', db_push_con, if_exists='replace', index=False)

# plot to check
f2, ax2 = plt.subplots(figsize=(19.20, 10.80))
dfProv.boundary.plot(ax=ax2, color=None, edgecolor='black', linewidth=1, aspect=1)
dfElevation.to_crs(3347).plot(ax=ax2, color='red', markersize=0.1, aspect=1)

In [8]:
centroids = "lgFireFiftyCentroids"
query4 = "SELECT * FROM public.\"{}\";".format(centroids)
dfCent = gpd.GeoDataFrame.from_postgis(query4, db_pull_con)

# plot geo data to check
f3, ax3 = plt.subplots(figsize=(19.20, 10.80))
dfProv.boundary.plot(ax=ax3, color=None, edgecolor='black', linewidth=1, aspect=1)
dfCent.plot(ax=ax3, color='red', markersize=0.1, aspect=1)

In [10]:
dfFire = dfCent.copy(deep=True)

# we selct entry ids that also exist in dfFireWeather
dfFire = dfFire[dfFire['EntryID'].isin(dfFireWeather['EntryID'])]

# we find the closest point in dfElevation and store the elevation
for index, row in dfFire.iterrows():
    dfElevation['DISTANCE'] = dfElevation['geom'].distance(row['geom'])
    dfElevation = dfElevation.sort_values(by=['DISTANCE'])
    dfFire.at[index, 'ElevationM'] = dfElevation.iloc[0]['e']

# we find the closest point in dfWater and store the distance to water
for index, row in dfFire.iterrows():
    dfWater['DISTANCE'] = dfWater['geom'].distance(row['geom'])
    dfWater = dfWater.sort_values(by=['DISTANCE'])
    dfFire.at[index, 'Dist_To_Water'] = dfWater.iloc[0]['DISTANCE']



In [11]:
print(dfFire.describe())
print(dfFire.count())
print(dfFire.isnull().sum().sum())

       EntryID  ElevationM  Dist_To_Water
count    428.0       428.0          428.0
mean    3829.2       883.5         4370.1
std     3843.2       460.7         4712.6
min       33.0       334.0            0.0
25%      757.5       457.8         1039.4
50%     1736.0       787.5         2595.7
75%     8700.2      1280.0         5928.5
max    12248.0      1952.0        28555.3
EntryID          428
geom             428
ElevationM       428
Dist_To_Water    428
dtype: int64
0


In [12]:
# join fire with centroids
dfMerged = dfFireWeather.merge(dfFire, on='EntryID', how='left')

In [13]:
print(dfMerged.describe())
print(dfMerged.count())
print(dfMerged.isnull().sum().sum())

       EntryID   YEAR  MONTH   DAY  SIZE_HA  size_ha_bin  OneYear  OneMonth  \
count    428.0  428.0  428.0 428.0    428.0        428.0    428.0     428.0   
mean    3829.2 2013.9    6.8  15.9   8936.9          1.7   2013.9       5.8   
std     3843.2    2.6    0.9   9.8  32731.6          1.1      2.6       0.9   
min       33.0 2010.0    5.0   1.0    200.7          0.0   2010.0       4.0   
25%      757.5 2012.0    6.0   6.0    651.8          1.0   2012.0       5.0   
50%     1736.0 2014.0    7.0  15.0   1880.7          2.0   2014.0       6.0   
75%     8700.2 2016.0    7.0  25.2   5092.8          3.0   2016.0       6.0   
max    12248.0 2020.0   10.0  31.0 491429.4          3.0   2020.0       9.0   

       OneMeanTemp  OneMinTemp  OneMaxTemp  OneMeanDewPoint  OneMinDewPoint  \
count        428.0       428.0       428.0            428.0           428.0   
mean          12.5        -2.8        27.3              3.6            -7.5   
std            4.7         6.6         3.2         

In [14]:
dfFinal = dfMerged.copy(deep=True)

In [15]:
dfFinal = gpd.GeoDataFrame(dfFinal, geometry='geom')
dfFinal.to_postgis('FireWeatherWaterElev', db_push_con, if_exists='replace', index=False)

In [16]:
dfFireWaterElev = dfMerged.copy(deep=True)
dfFireWaterElev.drop(columns=['OneMonth', 'OneYear', 'TwoMonth', 'TwoYear', 
                                'TwoMeanTemp', 'TwoMinTemp', 'TwoMaxTemp', 'TwoMeanDewPoint', 'TwoMinDewPoint', 'TwoMaxDewPoint',
                                     'TwoMeanHumidity', 'TwoMinHumidity', 'TwoMaxHumidity', 'TwoMeanPressure', 'TwoMinPressure',
                                     'TwoMaxPressure', 'TwoMeanWindSpeed', 'TwoMinWindSpeed', 'TwoMaxWindSpeed', 'TwoMeanWindChill',
                                     'TwoMinWindChill', 'TwoMaxWindChill', 'TwoTotalPrecip', 'TwoMeanWindDirection',
                                     'OneMeanTemp', 'OneMinTemp', 'OneMaxTemp', 'OneMeanDewPoint', 'OneMinDewPoint', 'OneMaxDewPoint',
                                     'OneMeanHumidity', 'OneMinHumidity', 'OneMaxHumidity', 'OneMeanPressure', 'OneMinPressure',
                                     'OneMaxPressure', 'OneMeanWindSpeed', 'OneMinWindSpeed', 'OneMaxWindSpeed', 'OneMeanWindChill',
                                      'OneMinWindChill', 'OneMaxWindChill', 'OneTotalPrecip', 'OneMeanWindDirection'], inplace=True)
                                      

In [17]:
print(dfFireWaterElev.describe())
print(dfFireWaterElev.count())
print(dfFireWaterElev.isnull().sum().sum())

       EntryID   YEAR  MONTH   DAY  SIZE_HA  size_ha_bin  ElevationM  \
count    428.0  428.0  428.0 428.0    428.0        428.0       428.0   
mean    3829.2 2013.9    6.8  15.9   8936.9          1.7       883.5   
std     3843.2    2.6    0.9   9.8  32731.6          1.1       460.7   
min       33.0 2010.0    5.0   1.0    200.7          0.0       334.0   
25%      757.5 2012.0    6.0   6.0    651.8          1.0       457.8   
50%     1736.0 2014.0    7.0  15.0   1880.7          2.0       787.5   
75%     8700.2 2016.0    7.0  25.2   5092.8          3.0      1280.0   
max    12248.0 2020.0   10.0  31.0 491429.4          3.0      1952.0   

       Dist_To_Water  
count          428.0  
mean          4370.1  
std           4712.6  
min              0.0  
25%           1039.4  
50%           2595.7  
75%           5928.5  
max          28555.3  
EntryID          428
FIRE_ID          428
FIRENAME         428
YEAR             428
MONTH            428
                ... 
size_ha_bin      4

In [19]:
dfFireWaterElev = gpd.GeoDataFrame(dfFireWaterElev, geometry='geom')
dfFireWaterElev.to_postgis('FireWaterElev', db_push_con, if_exists='replace', index=False)