<a href="https://colab.research.google.com/github/37stu37/FFE/blob/master/FFErunsParallel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pwd

/content


**Imports**
---



In [3]:
%cd /content/drive/My Drive/Colab Notebooks/01_Repository/FFE

/content/drive/My Drive/Colab Notebooks/01_Repository/FFE


In [4]:
%%capture
!pip install memory_profiler
!pip install git+https://github.com/dask/fastparquet

In [5]:
import numpy as np
import pandas as pd
from pathlib import Path
import memory_profiler as mem_profile
import sys
import os
import glob
import multiprocessing as mp
# import zipfile
from zipfile import ZipFile 


pd.options.mode.chained_assignment = None  # default='warn'

**Load data from zip file**
---



In [6]:
edge_file = './output/FinnShapeEdges.parquet'
wind_file = './data/Copy of GD_wind.csv'
folder = '../../02_Output/ffeFinnComparison'

In [7]:
# load data
wind_data = pd.read_csv(wind_file) 
edgelist = pd.read_parquet(edge_file, engine='pyarrow')

**Definitions**
---

In [9]:
# %%timeit
def wind_scenario(wind_data):
      i = np.random.randint(0, wind_data.values.shape[0])
      w = wind_data.values[i, 2]
      dist = wind_data.values[i, 1]
      b = wind_data.values[i, 3]
      bear_max = b + 45  # wind direction
      bear_min = b - 45
      if b == 360:
          bear_max = 45
      if b <= 0:  # should not be necessary
          bear_min = 0
      if b == 999:
          bear_max = 999
          bear_min = 0
      return bear_max, bear_min, dist # wind characteristics, bearing and distance


def ignition(edges=edgelist):
    rng = np.random.uniform(0, 1, size=edges.values.shape[0])
    mask = rng < edges.IgnProbBld.values
    NewActiveEdges = edges[mask]
    return NewActiveEdges


def mask(t, activeEdges_d, listActivatedSources_d, w_b_max, w_b_min, w_d):
    if t==0: # special case at time=0
        return activeEdges_d
    else:
        mask = (activeEdges_d.bearing.values < w_b_max) & (activeEdges_d.bearing.values < w_b_min) & (activeEdges_d.distance < w_d)
        NewActiveEdges = activeEdges_d[mask]
        NewActiveEdges = NewActiveEdges[~NewActiveEdges.source.isin(listActivatedSources_d)]
        return NewActiveEdges


def propagation(activeEdges_d, edges=edgelist):
    NewActiveEdges = edges[edges.source.isin(activeEdges_d.target)]
    return NewActiveEdges


def clean_up(path):
    files = glob.glob(path)
    print(" {} files removed".format(len(files)))
    for f in files:
      os.remove(f)


def ffe_runs(n):
    for scenario in range(n):
        # initial setup
        listActivatedSources = []
        listScenarioDataframes = []
        condition = True
        time = 0 
        # wind conditions
        w_bearing_max, w_bearing_min, w_distance = wind_scenario(wind_data)
        # ignition / initial state and edges selection
        ActiveEdges = ignition()
        if ActiveEdges.empty:
            continue
        while condition: # spread burn zone
            ActiveEdges = mask(time, ActiveEdges, listActivatedSources, w_bearing_max, w_bearing_min, w_distance)
            if ActiveEdges.empty: #no more buildings to burn
                break
            listScenarioDataframes.append(ActiveEdges)
            listActivatedSources.extend(ActiveEdges.source.values)
            ActiveEdges = propagation(ActiveEdges)
            time += 1
        
        print(f'finishing pid {os.getpid()} scenario --- {scenario} time ---- {time}')

        Activations = pd.concat(listScenarioDataframes)
        Activations["scenario"] = scenario
        Activations["pid"] = os.getpid()
        Activations.to_parquet(str(folder) + '/' + f'scenario{scenario}_pid{os.getpid()}.parquet', engine='auto', compression="GZIP")
        

**Main**
---


---



In [12]:
# run process on all available cores - need to input number of scenarios required
# %%time
scenario_needed = 3000
listPool = [int(scenario_needed/mp.cpu_count())] * mp.cpu_count() # to avoid RAM overload on CPU

print(f'{mp.cpu_count()} cores for {sum(listPool)} scenarios')
print(f'{len(listPool)} processes (pid) with {listPool[0]} scenarios each')
print(f'A total of {sum(listPool)} will be created')

4 cores for 3000 scenarios
4 processes (pid) with 750 scenarios each
A total of 3000 will be created


In [None]:
%%time
# multiprocessing
print("Started")
p = mp.Pool()
results = p.map(ffe_runs, listPool)
print("Complete")

Started
finishing pid 23070 scenario --- 0 time ---- 22
finishing pid 23073 scenario --- 0 time ---- 22
finishing pid 23071 scenario --- 0 time ---- 22
finishing pid 23072 scenario --- 0 time ---- 22
finishing pid 23070 scenario --- 1 time ---- 1
finishing pid 23073 scenario --- 1 time ---- 1
finishing pid 23071 scenario --- 1 time ---- 1
finishing pid 23072 scenario --- 1 time ---- 1
finishing pid 23070 scenario --- 2 time ---- 13
finishing pid 23073 scenario --- 2 time ---- 13
finishing pid 23071 scenario --- 2 time ---- 13
finishing pid 23072 scenario --- 2 time ---- 13
finishing pid 23070 scenario --- 3 time ---- 11
finishing pid 23073 scenario --- 3 time ---- 11
finishing pid 23071 scenario --- 3 time ---- 11
finishing pid 23072 scenario --- 3 time ---- 11
finishing pid 23070 scenario --- 4 time ---- 1
finishing pid 23073 scenario --- 4 time ---- 1
finishing pid 23071 scenario --- 4 time ---- 1
finishing pid 23072 scenario --- 4 time ---- 1
finishing pid 23073 scenario --- 5 time 


**Backup**
---



---



In [None]:
os.listdir(folder)

In [None]:
# clean_up('/content/drive/My Drive/Colab Notebooks/02_Output/ffeFinnComparison/scenario*')

In [None]:
# pqt = pd.read_parquet("/content/drive/My Drive/04_Cloud/01_Work/GNS/008_FFE/runs/output/scenario0_pid1998_Activations.parquet")

In [None]:
# num_cores = multiprocessing.cpu_count()
# print(num_cores)