<a href="https://colab.research.google.com/github/37stu37/FFE/blob/master/FFE_numba_Dask.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%%time 
%%capture
!apt update
!apt upgrade
!apt install gdal-bin python-gdal python3-gdal 
# Install rtree - Geopandas requirment
!apt install python3-rtree 
# Install Geopandas
!pip install git+git://github.com/geopandas/geopandas.git
# Install descartes - Geopandas requirment
!pip install descartes 
!pip install memory_profiler

CPU times: user 2.95 s, sys: 663 ms, total: 3.62 s
Wall time: 26.3 s


In [0]:
# Load the Drive helper and mount
from google.colab import drive
%tensorflow_version 2.x
# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
%%time
import datetime
import glob
from math import sqrt
import os
import matplotlib.pyplot as plt
import bokeh
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy.spatial import distance
from shapely.geometry import box
from shapely.geometry import shape
from shapely.geometry import Point
import networkx as nx
from sys import getsizeof
from numba import jit
import dask.dataframe as dd
import dask.array as da
import dask
from dask.distributed import Client
from dask.diagnostics import ProgressBar
%matplotlib inline
%load_ext memory_profiler

pd.options.mode.chained_assignment = None  # default='warn'

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler
CPU times: user 44.7 ms, sys: 4.13 ms, total: 48.8 ms
Wall time: 47.6 ms


In [0]:
client = Client(processes=False)
client

Failed to start diagnostics server on port 8787. [Errno 99] Cannot assign requested address
Could not launch service 'bokeh' on port 8787. Got the following message:

[Errno 99] Cannot assign requested address
  self.scheduler.start(scheduler_address)


0,1
Client  Scheduler: inproc://172.28.0.2/143/28,Cluster  Workers: 1  Cores: 4  Memory: 27.40 GB


Set up the path  to data and output

In [0]:
path = '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_input'
path_output = '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_output'

!ls "/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_input"
!ls '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_output/dask_edge_list'
# !ls '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_output'

buildings_raw.cpg	buildings_raw_pts.shx
buildings_raw.dbf	buildings_raw.qpj
buildings_raw.prj	buildings_raw.shp
buildings_raw_pts.cpg	buildings_raw.shx
buildings_raw_pts.dbf	GD_wind.csv
buildings_raw_pts.mshp	outputs_centroids_allpga_1000GMFs
buildings_raw_pts.prj	outputs_centroids_allpga_1000GMFsPERCENTILES
buildings_raw_pts.shp	source_target.csv
edge_data.parquet


**Functions**


---



In [0]:
def clean_up_file(path, prefix):
    files = glob.glob(os.path.join(path, prefix))
    for file in files:
      try:
        shutil.rmtree(file)
      except:
        os.remove(file)

In [0]:
# wind scenario
def wind_scenario(file_name):
    # wind scenario conditions
    wind_data = pd.read_csv(os.path.join(path, file_name))
    i = np.random.randint(0, wind_data.shape[0])
    w = wind_data.iloc[i, 2]
    d = wind_data.iloc[i, 1]
    b = wind_data.iloc[i, 3]
    # wind direction
    wind_bearing_max = b + 45
    wind_bearing_min = b - 45
    if b == 360:
        wind_bearing_max = 45
    if b <= 0:  # should not be necessary
        wind_bearing_min = 0
    if b == 999:
        wind_bearing_max = 999
        wind_bearing_min = 0
    
    return wind_bearing_max, wind_bearing_min, d

In [0]:
# create ignitions
def ignitions(edges, scenario):
    # add random column
    edges['rng'] = np.random.uniform(0, 1, size=len(edges))
    # filter on random column
    fires = edges[edges.rng < edges.IgnProb_bl]

    fires['step'] = 0
    fires['scenario'] = scenario

    return fires

In [0]:
# conditions of fire propagation
def conditions_spread(fires,burn,wind_bearing_max,wind_bearing_min,wind_distance,
                      scenario, step):
    # add columns to ddf
    fires['wind_bearing_max'] = wind_bearing_max
    fires['wind_bearing_min'] = wind_bearing_min
    fires['wind_distance'] = wind_distance
    # wind speed -> neighbors selection from wind buffer
    new_fires = fires[fires.distance < fires.wind_distance]
    # wind direction
    new_fires = new_fires[(new_fires.bearing < new_fires.wind_bearing_max) & (new_fires.bearing < new_fires.wind_bearing_min)]
    # should not be already burnt
    new_fires = new_fires[~new_fires.target.isin(burn)]

    # add columns
    new_fires['step'] = step
    new_fires['scenario'] = scenario

    # log burnt assets
    burn.extend(fires.source)
    # remove duplicates from burn list
    burn = list(set(burn))

    # export active fire to parquet for record
    fires.to_parquet(os.path.join(path_output, 
                                   'output_scenario_{}_step_{}.parquet'.format(scenario, step)), engine='pyarrow')
    
    return new_fires, burn

In [0]:
# spreading fire
def new_fires(fires, edges):
  fires_list = list(set(fires.target))
  new_fires = edges[edges.source.isin(fires_list)]
  return new_fires

In [0]:
# log burned assets
@dask.delayed
def record_burnt_assets_for_scenario(scenario):
  L = []
  files = glob.glob(os.path.join(path_output, "*output_scenario_{}*".format(scenario)))
  for file in files:
    pqt = dd.read_parquet(file)
    L.append(pqt)
  df = dd.concat(L)
  return df

In [0]:
def count_fid_occurences(df, scenario):
  count = df.source.value_counts().compute()
  count_df = count.to_frame()
  count_df['count'] = count
  count_df['source'] = count_df.index
  count_df['scenario'] = scenario
  count_df = count_df.reset_index()
  count_df = count_df.drop(columns='index')
  count_df.to_parquet(os.path.join(path_output, 
                                   'output_scenario_{}.parquet'.format(scenario)), engine='pyarrow')
  return print('output_scenario_{}.parquet created'.format(scenario))

In [0]:
# display network
def display_network(edge_list_dataframe):
    graph = nx.from_pandas_edgelist(edge_list_dataframe, edge_attr=True)
    options = {'node_color': 'red', 'node_size': 50, 'width': 1, 'alpha': 0.4,
               'with_labels': False, 'font_weight': 'bold'}
    nx.draw_kamada_kawai(graph, **options)
    plt.show()
    return graph

In [0]:
# run main
def main(number_of_scenarios, edges):
  # --- SCENARIOS
  print("number of scenarios : {}".format(number_of_scenarios))
  for scenario in range(number_of_scenarios):
      list_BURNED = []
      BURNED = [] # pd.DataFrame(columns=['source','target','distance', 'bearing',
                                      # 'IgnProb_bl', 'scenario', 'step'])
      # print("--- SCENARIO : {}".format(scenario))
      FIRES = ignitions(EDGES, scenario)
      if len(FIRES) == 0:
          # print("no fire")
          continue
      wind_bearing_max, wind_bearing_min, wind_distance = wind_scenario('GD_wind.csv') # no filtering, just adding wind info to dataframe
      # --------- STEPS
      for step in range(len(EDGES)):
          # print("--------- STEP : {}".format(step))
          FIRES, BURNED = conditions_spread(FIRES, BURNED, wind_bearing_max, wind_bearing_min, wind_distance,scenario, step) # filtering
          # list_BURNED.append(BURNED)
          if len(FIRES) == 0:
            # print("no more fire")
            break
          FIRES = new_fires(FIRES, edges)

      record = record_burnt_assets_for_scenario(scenario)
      count_fid_occurences(record, scenario)
      clean_up_file(path_output, "*_step_*")

  return

**Clean up directories and load data**


---



In [0]:
clean_up_file(path_output, "output*")

In [0]:
!ls '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_output'

dask_edge_list	shapefiles


In [0]:
%%time
%memit
# load data
# EDGES = dd.read_parquet(os.path.join(path_output, 'dask_edge_list', 'edge_data.parquet'), engine='pyarrow')
EDGES = pd.read_parquet(os.path.join(path_output, 'dask_edge_list', 'edge_data.parquet'), engine='pyarrow')
print("number of edges : {}".format(len(EDGES)))
# G = display_network(EDGES)

peak memory: 1873.41 MiB, increment: 0.00 MiB
number of edges : 3457222
CPU times: user 1.07 s, sys: 483 ms, total: 1.55 s
Wall time: 1.42 s


**Run the algorithm**


---



In [0]:
%%time
%memit
# run main
main(1001, EDGES)

**Manage output and postprocessing**


---



In [0]:
# remove parquet_file
# clean_up_file(path_output, "output*")
# !ls '/content/drive/My Drive/05_Sync/FFE/FireNetwork/00_output'