<a href="https://colab.research.google.com/github/37stu37/FFE/blob/master/FFErunsParallel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Imports**
---



In [7]:
!pip install dispy



In [8]:
%cd /content/drive/My Drive/Colab Notebooks/01_Repository/FFE

/content/drive/My Drive/Colab Notebooks/01_Repository/FFE


In [9]:
import numpy as np
import pandas as pd
from pathlib import Path
import sys
import os
import glob
import multiprocessing as mp


pd.options.mode.chained_assignment = None  # default='warn'

**Load data from zip file**
---



In [10]:
edge_file = './output/FinnShapeEdges.parquet'
wind_file = './data/Copy of GD_wind.csv'
folder = '../../02_Output/ffeFinnComparison'

In [11]:
# load data
wind_data = pd.read_csv(wind_file) 
edgelist = pd.read_parquet(edge_file, engine='pyarrow')

***probability of Ignition must be divided by the number of time the "source" is present in the edge list !!!!***

In [12]:
edgelist['IgnProbBld'].describe()

count    3.174930e+06
mean     1.198209e-04
std      1.813387e-04
min      2.440851e-05
25%      5.970669e-05
50%      9.173903e-05
75%      1.227407e-04
max      5.239063e-03
Name: IgnProbBld, dtype: float64

In [13]:
print(len(edgelist), "&", len(edgelist.source.unique()))

3174930 & 69824


In [14]:
FreqCorrection = edgelist[["source"]]
FreqCorrection['freq'] = edgelist.groupby('source')['source'].transform('count')
FreqCorrection.drop_duplicates(inplace=True)
FreqCorrection

Unnamed: 0,source,freq
0,0,46
46,1,41
87,2,56
143,3,34
177,4,46
...,...,...
3174708,73730,57
3174765,73731,37
3174802,73732,23
3174825,73733,58


In [15]:
edgelist = edgelist.merge(FreqCorrection, on=['source'], how='left')
edgelist['IgnProbBld'] = edgelist['IgnProbBld'] / edgelist['freq']
edgelist

Unnamed: 0,source,target,distance,bearing,IgnProbBld,freq
0,0,41,60.968041,212.179941,8.664816e-07,46
1,0,391,15.516658,177.323424,8.664816e-07,46
2,0,2658,21.955722,211.123870,8.664816e-07,46
3,0,5879,70.803928,136.343138,8.664816e-07,46
4,0,6831,56.944753,315.930929,8.664816e-07,46
...,...,...,...,...,...,...
3174925,73734,61496,50.604073,38.305974,5.380858e-06,47
3174926,73734,64897,1.027726,314.500873,5.380858e-06,47
3174927,73734,65052,50.627069,32.191654,5.380858e-06,47
3174928,73734,70488,75.132354,231.759090,5.380858e-06,47


In [None]:
# corrected edgelist with proper Ignition probability
edgelist.drop("freq", axis=1, inplace=True)

**Definitions**
---

In [21]:
# %%timeit
def wind_scenario(wind_data):
      i = np.random.randint(0, wind_data.values.shape[0])
      w = wind_data.values[i, 2]
      dist = wind_data.values[i, 1]
      b = wind_data.values[i, 3]
      bear_max = b + 45  # wind direction
      bear_min = b - 45
      if b == 360:
          bear_max = 45
      if b <= 0:  # should not be necessary
          bear_min = 0
      if b == 999:
          bear_max = 999
          bear_min = 0
      return bear_max, bear_min, dist # wind characteristics, bearing and distance


def ignition(edges=edgelist):
    rng = np.random.uniform(0, 1, size=edges.values.shape[0])
    mask = rng < edges.IgnProbBld.values
    NewActiveEdges = edges[mask]
    return NewActiveEdges


def mask(t, activeEdges_d, listActivatedSources_d, w_b_max, w_b_min, w_d):
    if t==0: # special case at time=0
        return activeEdges_d
    else:
        mask = (activeEdges_d.bearing.values < w_b_max) & (activeEdges_d.bearing.values < w_b_min) & (activeEdges_d.distance < w_d)
        NewActiveEdges = activeEdges_d[mask]
        NewActiveEdges = NewActiveEdges[~NewActiveEdges.source.isin(listActivatedSources_d)]
        return NewActiveEdges


def propagation(activeEdges_d, edges=edgelist):
    NewActiveEdges = edges[edges.source.isin(activeEdges_d.target)]
    return NewActiveEdges


def clean_up(path):
    files = glob.glob(path)
    print(" {} files removed".format(len(files)))
    for f in files:
      os.remove(f)


def ffe_runs(n):
    for scenario in range(n):
        # initial setup
        listActivatedSources = []
        listScenarioDataframes = []
        condition = True
        time = 0 
        # wind conditions
        w_bearing_max, w_bearing_min, w_distance = wind_scenario(wind_data)
        # ignition / initial state and edges selection
        ActiveEdges = ignition()
        # print(f"{len(ActiveEdges)} ignitions")
        if ActiveEdges.empty:
            continue
        while condition: # spread burn zone
            ActiveEdges = mask(time, ActiveEdges, listActivatedSources, w_bearing_max, w_bearing_min, w_distance)
            if ActiveEdges.empty: #no more buildings to burn
                break
            listScenarioDataframes.append(ActiveEdges)
            listActivatedSources.extend(ActiveEdges.source.values)
            ActiveEdges = propagation(ActiveEdges)
            time += 1
        
        print(f'finishing pid {os.getpid()} scenario --- {scenario} time ---- {time}')

        Activations = pd.concat(listScenarioDataframes)
        Activations["scenario"] = scenario
        Activations["pid"] = os.getpid()
        Activations.to_parquet(str(folder) + '/' + f'scenario{scenario}_pid{os.getpid()}.parquet', engine='auto', compression="GZIP")
        

In [18]:
# ffe_runs(20)

**Main**
---


---



In [23]:
# run process on all available cores - need to input number of scenarios required
# %%time
scenario_needed = 3000
listPool = [int(scenario_needed/mp.cpu_count())] * mp.cpu_count() # to avoid RAM overload on CPU

print(f'{mp.cpu_count()} cores for {sum(listPool)} scenarios')
print(f'{len(listPool)} processes (pid) with {listPool[0]} scenarios each')
print(f'A total of {sum(listPool)} scenarios will be created')

4 cores for 3000 scenarios
4 processes (pid) with 750 scenarios each
A total of 3000 scenarios will be created


In [None]:
%%time
# multiprocessing Main
print("Started")
p = mp.Pool()
results = p.map(ffe_runs, listPool)
print("Complete")

In [None]:
%%time
# multiprocessing Main
if __name__ == '__main__':
    worker_count = 4
    worker_pool = []
    for _ in range(worker_count):
        p = mp.Process(target=ffe_runs, args=(750,))
        p.start()
        worker_pool.append(p)
    for p in worker_pool:
        p.join()

finishing pid 1415 scenario --- 0 time ---- 41
finishing pid 1418 scenario --- 0 time ---- 41
finishing pid 1416 scenario --- 0 time ---- 41
finishing pid 1417 scenario --- 0 time ---- 41
finishing pid 1415 scenario --- 1 time ---- 14
finishing pid 1418 scenario --- 1 time ---- 14
finishing pid 1416 scenario --- 1 time ---- 14
finishing pid 1417 scenario --- 1 time ---- 14
finishing pid 1415 scenario --- 2 time ---- 44
finishing pid 1418 scenario --- 2 time ---- 44
finishing pid 1416 scenario --- 2 time ---- 44
finishing pid 1417 scenario --- 2 time ---- 44



**Backup**
---



---



In [30]:
len(os.listdir(folder))

117

In [31]:
clean_up('/content/drive/My Drive/Colab Notebooks/02_Output/ffeFinnComparison/scenario*')

 116 files removed


In [None]:
# pqt = pd.read_parquet("/content/drive/My Drive/04_Cloud/01_Work/GNS/008_FFE/runs/output/scenario0_pid1998_Activations.parquet")

In [None]:
# num_cores = multiprocessing.cpu_count()
# print(num_cores)