
#Installation
---



In [3]:
from pathlib import Path
import os
from datetime import date
import re

In [4]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import box
import dask.dataframe as dd
pd.options.mode.chained_assignment = None

In [69]:
# Paths
p = Path('Z:\FFE')
pathShapefile = Path(p / 'shapefile')
pathParquets = Path(p / 'output')

print(len(os.listdir(pathParquets)))

10


In [90]:
def get_list_pids(path=pathParquets):
    pidList = []
    updtScenariolist = []
    files = pathParquets.glob('*.parquet')
    for file in files:
        regex = r"pid\d*"
        pidNames = re.findall(regex, str(file))
        for pidName in pidNames:
          print(f" file pid {pidName}")
          pidList.append(pidName)
    pidList = list(set(pidList))
    updtScenariolist = np.arange(0, 3000, 300).tolist()
    return pidList, updtScenariolist


def read_and_concatenate_parquets(pidList,incrList,path=pathParquets):
    L = []
    for idx, val in enumerate(pidList):
        files = pathParquets.glob('*'+str(val)+'.parquet')
        for file in files:
            print(file)
            pqt = pd.read_parquet(file, engine='auto')
            pqt['scenario']=pqt['scenario']+incrList[idx]
#             print(pqt.head())
            L.append(pqt)
            print(len(L))
    df = dd.concat(L)
    return df

    
def count_fid_occurences(df):
    count_df = df.groupby(['scenario', 'source'])[['source']].count().rename(columns={'count':'count'}).compute()
    count_df = count_df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'})
    count_df.to_parquet(str(pathShapefile) + '/' + f'CountBurn-{str(date.today())}.parquet', 
                        engine='auto', compression="GZIP")# could be datetime.now
    return count_df


def Merge(countDf,nameShapefile):
  # Shapefile
  gdfShape = gpd.read_file(pathShapefile / nameShapefile)
  gdfShape.insert(0, 'FID', range(0, len(gdfShape)))
  gdfShape.rename(columns={'FID': 'source'}, inplace=True)
  gdfShape = gdfShape[['source', 'geometry']]
  merged = countDf.merge(gdfShape, on=['source'], how='left')
  return merged


def createShapefile(df):
  # df = df.to_pandas()
  gdf = gpd.GeoDataFrame(df, geometry='geometry')
  # gdf.plot(column='count', cmap='hsv', legend=True)
  gdf.to_file(os.path.join(str(pathShapefile) + "/" + "Burn3000scenarioWellington.shp"))
  return gdf

In [91]:
p, i = get_list_pids()

 file pid pid167572
 file pid pid167573
 file pid pid167575
 file pid pid150543
 file pid pid150542
 file pid pid167574
 file pid pid150539
 file pid pid150540
 file pid pid167576
 file pid pid150541


In [92]:
%%time
concatDf = read_and_concatenate_parquets(pidList=p,incrList=i)

/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid150540.parquet
1
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid167572.parquet
2
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid167576.parquet
3
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid150543.parquet
4
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid167573.parquet
5
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid167575.parquet
6
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid150539.parquet
7
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid150541.parquet
8
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid150542.parquet
9
/Users/alex/PycharmProjects/FFE/data/test_parquets/scenario0_pid167574.parquet
10
CPU times: user 390 ms, sys: 97.6 ms, total: 488 ms
Wall time: 394 ms


In [93]:
%%time
countConcatDf = count_fid_occurences(concatDf)
countConcatDf

CPU times: user 238 ms, sys: 36.5 ms, total: 274 ms
Wall time: 257 ms


In [None]:
%%time
mergedDf = Merge(countConcatDf, 'WellWHV_Buildings.shp')
countShape = createShapefile(mergedDf)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import contextily as ctx
%matplotlib inline
plt.style.use('seaborn-whitegrid')

FinnMeshblockShape = gpd.read_file(pathShapefile / 'Finn_MeshBlockSummary.shp')
countShape = gpd.read_file(pathShapefile / 'Burn3000scenarioWellington.shp')

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(12, 10))

p1 = originalShape.plot(ax=ax1, column='WellWHV_Bu', cmap='YlOrRd', alpha=0.7, legend=True)
ctx.add_basemap(ax1, crs=2193)
p2 = countShape.plot(ax=ax2, column='count', cmap='YlOrRd', alpha=0.7, legend=True)
ctx.add_basemap(ax2, crs=2193)

ax1.set_title('Original burn count at meshblock level')
ax1.ticklabel_format(useOffset=False, style='plain')
ax2.set_title('Network burn count at individual building level')
ax2.ticklabel_format(useOffset=False, style='plain')
ax1.tick_params(direction='out', length=6)
ax2.tick_params(direction="out", length=6)

fig.autofmt_xdate()
plt.tight_layout()
plt.savefig(pathShapefile / 'ComparisonGISvsNetwork_3000Burn.png', dpi=600)
plt.show()