In [26]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
import dask.dataframe as dd
import geopandas as gpd

from pathlib import Path
import sys
import os
import glob
import multiprocessing as mp
from datetime import date

from tqdm.notebook import tqdm

from bokeh.plotting import figure, output_file, show

pd.options.mode.chained_assignment = None  # default='warn'
%load_ext memory_profiler

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [27]:
%%time
# Folders
p = Path.cwd()
dataFolder = p.parent.parent / 'InOutRepoData' / 'FFE'
folder = dataFolder / 'OneScenarioOuput'

# Data
wind_data = pd.read_csv(p / 'data' / 'Copy_of_GD_wind.csv')
edgelist = pd.read_parquet(p / 'data' / 'FinnShapeEdges_old.parquet', engine='pyarrow')

CPU times: user 342 ms, sys: 395 ms, total: 738 ms
Wall time: 50 s


In [28]:
edgelist

Unnamed: 0,source,target,distance,bearing,IgnProbBld
0,0,41,60.968041,212.179941,0.000040
1,0,391,15.516658,177.323424,0.000040
2,0,2658,21.955722,211.123870,0.000040
3,0,5879,70.803928,136.343138,0.000040
4,0,6831,56.944753,315.930929,0.000040
...,...,...,...,...,...
3174925,73734,61496,50.604073,38.305974,0.000253
3174926,73734,64897,1.027726,314.500873,0.000253
3174927,73734,65052,50.627069,32.191654,0.000253
3174928,73734,70488,75.132354,231.759090,0.000253


In [29]:
rngFile = edgelist[['source', 'IgnProbBld']]
rngFile.drop_duplicates(['source'], inplace=True)
rngFile.IgnProbBld.describe()
rngFile.source.describe()

count    69824.000000
mean         0.000129
std          0.000223
min          0.000024
25%          0.000061
50%          0.000092
75%          0.000124
max          0.005239
Name: IgnProbBld, dtype: float64

count    69824.000000
mean     36867.715742
std      21298.611168
min          0.000000
25%      18424.750000
50%      36860.500000
75%      55315.250000
max      73734.000000
Name: source, dtype: float64

Shoud look like this to match with Finn's original run:


Out[6]:

count    69824.000000
mean         0.000129
std          0.000223
min          0.000024
25%          0.000061
50%          0.000092
75%          0.000124
max          0.005239
Name: IgnProbBld, dtype: float64

Out[6]:

count    69824.000000
mean     36867.715742
std      21298.611168
min          0.000000
25%      18424.750000
50%      36860.500000
75%      55315.250000
max      73734.000000
Name: source, dtype: float64

In [30]:
# num_cores = multiprocessing.cpu_count()
# print(num_cores)
# # need to do this on Shell / Terminal:
# conda install ipyparallel
# ipcluster nbextension enable --user
# ipcluster start # or ipcluster start -n 4

In [31]:
import ipyparallel as ipp
client = ipp.Client()
dview = client[:]# limit to 10 cores for now
client.ids

[0, 1, 2, 3]

In [32]:
# add variables to all engines
dview["edgelist"]=edgelist
dview["rngFile"]=rngFile
dview["wind_data"]=wind_data
dview["folder"]=folder

# add all libraries to engines
with dview.sync_imports():
    import numpy as np
    import pandas as pd
    import dask.dataframe as dd
    import geopandas as gpd

    from pathlib import Path
    import sys
    import os
    import glob
    import multiprocessing as mp
    from datetime import date

    from tqdm.notebook import tqdm

importing numpy on engine(s)
importing pandas on engine(s)
importing dask.dataframe on engine(s)
importing geopandas on engine(s)
importing Path from pathlib on engine(s)
importing sys on engine(s)
importing os on engine(s)
importing glob on engine(s)
importing multiprocessing on engine(s)
importing date from datetime on engine(s)
importing tqdm from tqdm.notebook on engine(s)


In [33]:
%%px

def wind_scenario(wind_data):
    import numpy as np
    import pandas as pd
    i = 0
    w = wind_data.values[i, 2]
    dist = wind_data.values[i, 1]
    b = wind_data.values[i, 3]
    
    # need correction as bearing is where the wind is COMING FROM!
    if b<=360 and b>180:
        b = b-180
    else:
        b = b+180
        
    bear_max = b + 45  # wind direction
    bear_min = b - 45
    if b == 360:
        bear_max = 45
    if b <= 0:  # should not be necessary
        bear_min = 0
    if b == 999:
        bear_max = 999
        bear_min = 0
        
    print(f"w_direction : {b}, w_bearing_max : {bear_max}, w_bearing_min : {bear_min}, w_distance : {dist}")
    return bear_max, bear_min, dist  # wind characteristics, bearing and distance


def ignition(rngList=rngFile, edges=edgelist):
    import numpy as np
    import pandas as pd
    rngList['rng'] = np.random.uniform(0, 1, size=rngList.values.shape[0])
    rngList = rngList[rngList['rng'] < rngList['IgnProbBld']]
    initialIgnitions = len(rngList)
    NewActiveEdges = edges[edges['source'].isin(rngList['source'])]
    return NewActiveEdges, initialIgnitions


def mask(t, activeEdges_d, listActivatedSources_d, w_b_max, w_b_min, w_d):
    import numpy as np
    if t==0: # special case at time=0
        return activeEdges_d
    else:
        mask = (activeEdges_d.bearing.values < w_b_max) & (activeEdges_d.bearing.values < w_b_min) & (activeEdges_d.distance < w_d)
        NewActiveEdges = activeEdges_d[mask]
        NewActiveEdges = NewActiveEdges[~NewActiveEdges.source.isin(listActivatedSources_d)]
        return NewActiveEdges


def propagation(activeEdges_d, edges=edgelist):
    import numpy as np
    import pandas as pd
    NewActiveEdges = edges[edges.source.isin(activeEdges_d.target)]
    return NewActiveEdges

In [34]:
@dview.parallel(block = False) # The @parallel decorator breaks up elementwise operations and distributes them.
def ffe_runs(n):
    import numpy as np
    import pandas as pd
    from datetime import date
    import numpy as np
    import pandas as pd
    import dask.dataframe as dd
    import geopandas as gpd

    from pathlib import Path
    import sys
    import os
    import glob
    import multiprocessing as mp
    from datetime import date

    from tqdm.notebook import tqdm
    for scenario in tqdm(n):
        # initial setup
        listActivatedSources = []
        listScenarioDataframes = []
        condition = True
        time = 0 
        # wind conditions
        w_bearing_max, w_bearing_min, w_distance = wind_scenario(wind_data)
        # ignition / initial state and edges selection
        ActiveEdges, numberIgnitions = ignition()
        if ActiveEdges.empty:
            print(f"no ignitions {numberIgnitions}")
            condition = False
            continue
        while condition: # spread burn zone
            ActiveEdges = mask(time, ActiveEdges, listActivatedSources, w_bearing_max, w_bearing_min, w_distance)
            if ActiveEdges.empty: #no more buildings to burn
                break
            burns = ActiveEdges.drop_duplicates(['source'], inplace=False)
#             print(f"Active edges {len(ActiveEdges)} / no duplicate = {len(burns)}")
            burns['time'] = time
            listScenarioDataframes.append(burns)
            listActivatedSources.extend(ActiveEdges.source.values)
            ActiveEdges = propagation(ActiveEdges)
            time += 1
        
        print(f'finishing scenario --- {scenario} time ---- {time} \n started with {numberIgnitions} ignitions ')

        Activations = pd.concat(listScenarioDataframes)
        Activations["scenario"] = scenario
        Activations["InitialIgnitions"] = numberIgnitions
        Activations.to_parquet(str(folder) + '/' + f'scenario{scenario}_{str(date.today())}.parquet', engine='auto', compression="GZIP")

In [35]:
ffe_runs(range(3000))

<AsyncMapResult: ffe_runs>