In [1]:

# %pip install geopandas
# %pip install shapely
# %pip install pygeos
# %pip install tensorflow-cpu
# %pip install tensorflow-data-validation
# %pip install tensorflow-transform
# %pip install tensorflow-model-analysis
# %pip install scikit-learn
# %pip install scipy
# %pip install matplotlib
# %pip install python-dotenv
# %pip install seaborn

In [2]:
import os
import pandas as pd
import geopandas as gpd
import pygeos as pg
import numpy as np
import tensorflow as tf
import tensorflow_data_validation as tfdv
import sklearn as sk
import scipy as sp
import seaborn as sns
from IPython.display import clear_output
from matplotlib import pyplot as plt
from shapely import wkt


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# The following lines adjust the granularity of reporting.
#pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
os.chdir('F:\\Uni Files\\4710\\4710 Project\\MLweatherForestFire')

In [4]:
def getGPDfromPD(df: pd.DataFrame, geomCol: str, crs: str = "EPSG:3978") -> gpd.GeoDataFrame:
    """
    Convert a pandas dataframe to a geopandas dataframe
    :param df: pandas dataframe
    :param geomCol: name of the geometry column
    :param crs: coordinate reference system
    :return: geopandas dataframe
    """
    if 'geom' in df.columns:
        df.rename(columns={'geom': 'geometry'}, inplace=True)

    df[geomCol] = df[geomCol].apply(wkt.loads)
    gdf = gpd.GeoDataFrame(df, geometry=geomCol, crs=crs)
    return gdf

In [5]:
provinces = "Data/GEOProvincialBoundaries.csv"
dfProvinces = pd.read_csv(provinces)
dfProvinces['geom'] = dfProvinces['geom'].apply(wkt.loads)
gdfProvinces = gpd.GeoDataFrame(dfProvinces, geometry='geom', crs="EPSG:3347")
del dfProvinces
del provinces

In [6]:
# Load fire data
fireTable = "Data/GEOSKFiresAfter2000centroids.csv"
dfFire = pd.read_csv(fireTable)
dfFire['geom'] = dfFire['geom'].apply(wkt.loads)
gdfFire = gpd.GeoDataFrame(dfFire, geometry='geom', crs="EPSG:3347")
del dfFire
del fireTable

In [None]:
##### plot both to check
fig, ax = plt.subplots(figsize=(20, 20))
gdfProvinces.plot(ax=ax, color='white', edgecolor='black')
gdfFire.plot(ax=ax, color='red', markersize=1)


##### plot both to check
fig, ax = plt.subplots(figsize=(20, 20))
gdfProvinces.plot(ax=ax, color='white', edgecolor='black')
gdfCentroids.plot(ax=ax, color='red', markersize=1)


In [8]:
# Load the data
waterTable = "Data/GEOWaterSources.csv"
dfWater = pd.read_csv(waterTable)
dfWater['geom'] = dfWater['geom'].apply(wkt.loads)
gdfWater = gpd.GeoDataFrame(dfWater, geometry='geom', crs="EPSG:3347")
del dfWater
del waterTable

# plot to check
f1, ax1 = plt.subplots(figsize=(19.20, 10.80))
dfProv.boundary.plot(ax=ax1, color=None, edgecolor='black', linewidth=1, aspect=1)
dfWater.plot(ax=ax1, color='red', markersize=0.1, aspect=1)


In [9]:
# Load fire data
elevationTable = "Data/GEOSKElevationPoints.csv"
dfElevation = pd.read_csv(elevationTable)
dfElevation['geom'] = dfElevation['geom'].apply(wkt.loads)
gdfElevation = gpd.GeoDataFrame(dfElevation, geometry='geom', crs="EPSG:3347")
del dfElevation
del elevationTable

# drop rows where e_u_en is Feet
gdfElevation = gdfElevation[gdfElevation['e_u_en'] != 'Feet']

In [10]:
# we find the closest point in dfElevation and store the elevation
for index, row in gdfFire.iterrows():
    gdfElevation['DISTANCE'] = gdfElevation['geom'].distance(row['geom'])
    gdfElevation = gdfElevation.sort_values(by=['DISTANCE'])
    gdfFire.at[index, 'ElevationM'] = gdfElevation.iloc[0]['e']

# we find the closest point in dfWater and store the distance to water
for index, row in gdfFire.iterrows():
    gdfWater['DISTANCE'] = gdfWater['geom'].distance(row['geom'])
    gdfWater = gdfWater.sort_values(by=['DISTANCE'])
    gdfFire.at[index, 'Dist_To_Water'] = gdfWater.iloc[0]['DISTANCE']


In [11]:
gdfFire.head()

Unnamed: 0,EntryID,FIRE_ID,FIRENAME,YEAR,MONTH,DAY,REP_DATE,SIZE_HA,size_ha_bin,geom,index_right,provID,ElevationM,Dist_To_Water
0,631,L81022,08LA-SIMMER,2008,6,6,2008-06-06,34665.8,3,POINT (5379014.920 2318191.017),7.0,SK,541.0,6094.2
1,638,L83027,08SE-ROCKY,2008,6,22,2008-06-22,245.9,0,POINT (5439865.094 2383987.264),7.0,SK,463.0,8399.1
2,639,L83020,08SE-MARGARET,2008,6,5,2008-06-05,4420.9,2,POINT (5505362.854 2401492.903),7.0,SK,436.0,4383.0
3,640,L83033,08SE-RYAN,2008,6,28,2008-06-28,5145.9,3,POINT (5603376.708 2394001.361),7.0,SK,348.0,1294.8
4,641,L82091,08PN-MIROND,2008,6,21,2008-06-21,90178.4,3,POINT (5529848.118 2176596.410),7.0,SK,1273.0,0.0


In [12]:
# save to csv GEOSKFireWaterElev.csv
# gdfFire.to_csv('Data/GEOSKFireWaterElev.csv', index=False)