<a href="https://colab.research.google.com/github/37stu37/FFE/blob/master/FFErunsParallel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
!pwd

/content/drive/My Drive/Colab Notebooks/01_Repository/FFE


**Imports**
---



In [16]:
%cd /content/drive/My Drive/Colab Notebooks/01_Repository/FFE

/content/drive/My Drive/Colab Notebooks/01_Repository/FFE


In [None]:
%%capture
!pip install memory_profiler
!pip install git+https://github.com/dask/fastparquet

In [18]:
import numpy as np
import pandas as pd
from pathlib import Path
import memory_profiler as mem_profile
import sys
import os
import glob
import multiprocessing as mp
# import zipfile
from zipfile import ZipFile 


pd.options.mode.chained_assignment = None  # default='warn'

**Load data from zip file**
---



In [19]:
edge_file = './output/FinnShapeEdges.parquet'
wind_file = './data/Copy of GD_wind.csv'
folder = '../../02_Output/ffeFinnComparison'

In [20]:
# load data
wind_data = pd.read_csv(wind_file) 
edgelist = pd.read_parquet(edge_file, engine='pyarrow')

In [21]:
edgelist

Unnamed: 0,source,target,distance,bearing,IgnProbBld
0,0,41,60.968041,212.179941,0.000040
1,0,391,15.516658,177.323424,0.000040
2,0,2658,21.955722,211.123870,0.000040
3,0,5879,70.803928,136.343138,0.000040
4,0,6831,56.944753,315.930929,0.000040
...,...,...,...,...,...
3174925,73734,61496,50.604073,38.305974,0.000253
3174926,73734,64897,1.027726,314.500873,0.000253
3174927,73734,65052,50.627069,32.191654,0.000253
3174928,73734,70488,75.132354,231.759090,0.000253


**Definitions**
---



In [65]:
# %%timeit
def wind_scenario(wind_data):
      i = np.random.randint(0, wind_data.values.shape[0])
      w = wind_data.values[i, 2]
      dist = wind_data.values[i, 1]
      b = wind_data.values[i, 3]
      bear_max = b + 45  # wind direction
      bear_min = b - 45
      if b == 360:
          bear_max = 45
      if b <= 0:  # should not be necessary
          bear_min = 0
      if b == 999:
          bear_max = 999
          bear_min = 0
      return bear_max, bear_min, dist # wind characteristics, bearing and distance


def ignition(edges=edgelist):
    rng = np.random.uniform(0, 1, size=edges.values.shape[0])
    mask = rng < edges.IgnProbBld.values
    NewActiveEdges = edges[mask]
    return NewActiveEdges


def mask(t, activeEdges_d, listActivatedSources_d, w_b_max, w_b_min, w_d):
    if t==0: # special case at time=0
        return activeEdges_d
    else:
        mask = (activeEdges_d.bearing.values < w_b_max) & (activeEdges_d.bearing.values < w_b_min) & (activeEdges_d.distance < w_d)
        NewActiveEdges = activeEdges_d[mask]
        NewActiveEdges = NewActiveEdges[~NewActiveEdges.source.isin(listActivatedSources_d)]
        return NewActiveEdges


def propagation(activeEdges_d, edges=edgelist):
    NewActiveEdges = edges[edges.source.isin(activeEdges_d.target)]
    return NewActiveEdges

def clean_up(path):
    files = glob.glob(path)
    print(" {} files removed".format(len(files)))
    for f in files:
      os.remove(f)
      
def ffe_runs(n):
    listScenarioDataframes = []
    for scenario in range(n):
        # initial setup
        condition = True
        listActivatedSources = []
        time = 0 
        # wind conditions
        w_bearing_max, w_bearing_min, w_distance = wind_scenario(wind_data)
        # ignition / initial state and edges selection
        ActiveEdges = ignition()
        if ActiveEdges.empty:
            continue
        while condition:
            ActiveEdges = mask(time, ActiveEdges, listActivatedSources, w_bearing_max, w_bearing_min, w_distance)
            if ActiveEdges.empty:
                break
            listScenarioDataframes.append(ActiveEdges)
            listActivatedSources.extend(ActiveEdges.source.values)
            ActiveEdges = propagation(ActiveEdges)
            time += 1
        
        # print(f'finishing pid {os.getpid()} scenario --- {scenario}')
        
    print("scenario : {}, process id: {}".format(scenario, os.getpid()))
    Activations = pd.concat(listScenarioDataframes)
    Activations["scenario"] = scenario
    Activations["pid"] = os.getpid()
    # change to 'auto' as pyarrow is giving problem
    Activations.to_parquet(str(folder) + '/' + f'scenario{scenario}_pid{os.getpid()}_Activations.parquet', engine='auto', compression="GZIP")
    # # line to test feather format
    # Activations.reset_index(inplace=True)
    # Activations.to_feather(str(folder) + '/' + f'scenario{scenario}_pid{os.getpid()}_Activations.ftr')

**Main**
---


---



In [64]:
# run process on all available cores - need to input number of scenarios required
%%time
n_scenario = [2] * mp.cpu_count()
pool = mp.Pool()
results = pool.map(ffe_runs, n_scenario)

scenario : 1, process id: 2218
scenario : 1, process id: 2217
scenario : 1, process id: 2215
scenario : 1, process id: 2216
CPU times: user 141 ms, sys: 170 ms, total: 312 ms
Wall time: 23.7 s



**Backup**
---



---



In [61]:
clean_up('/content/drive/My Drive/Colab Notebooks/02_Output/ffeFinnComparison/scenario*')

 4 files removed


In [None]:
# pqt = pd.read_parquet("/content/drive/My Drive/04_Cloud/01_Work/GNS/008_FFE/runs/output/scenario0_pid1998_Activations.parquet")

In [None]:
# num_cores = multiprocessing.cpu_count()
# print(num_cores)