In [2]:
""" 
Notebook for importing and spatialising csv output from HazImp, for QA/QC

This notebook searches the specified folder and locates .csv file output (building points and aggregated output) produced by HazImp. This notebook can loop through the output from multiple scenarios.

This notebook has been tested with the following folder structure: specified folder -> scenario code folder -> output.csv, specified folder -> scenario code folder -> another identifier folder -> output.csv Therefore .csv output files can be located in the specified folder or a further one or two folders deep.

This notebook will use the name of the folder containing the .csv to name the spatialised output.

The point and aggregated .csv output are the only .csv that can be located within the specified folder structure and must be of the format ....csv for points and ...agg.csv for aggregated.

***
This Notebook requires an environment which includes the arcGIS and arcpy modules
If you do not have an environment with these modules you can use the default ESRI environment

In ArcGIS Pro main menu (or Project menu) select 'Package Manager'
Top right next to 'Active Environment' select the cog button
clone and rename the default environment, save somewhere appropriate
Set this environment as the environment kernel when running this notebook
***

"""

import arcgis
from arcgis.gis import GIS
gis = GIS()
import os
import arcpy
from arcpy import metadata as md
import time

In [3]:
"""
Environment and file locations
This is the only section that should need to be edited

environment is commented out as output locations were used instead. this is so that tables and feature classes could
be saved in different locations.
if you want all data saved in the same location you can use the environment and remove os.path.join from functions in the loop below

the gdbs used for the output locations must already exist, this notebook will not create them.
"""

#Update to your working gdb
#arcpy.env.workspace = r"X:\georisk\HaRIA_B_Wind\projects\acs\2. DATA\1. Work Unit Assessment\NWRA\impact\test\test.gdb"

#Location of specified folder (containing folder structure for all hazimp output)
data_loc = r"X:\georisk\HaRIA_B_Wind\projects\acs\2. DATA\1. Work Unit Assessment\NWRA\impact\AEP\20231206_run_1\ACT"
#Location of feature class or shapefile of the aggregate boundaries matching the aggregation used in HazImp
agg_boundary_loc = r"X:\georisk\HaRIA_B_Wind\projects\acs\2. DATA\1. Work Unit Assessment\NWRA\NWRA.gdb\extents\SA1_2021_AUST"
#Location where the output will be saved (change this to your working gdb)
table_out_path = r"X:\georisk\HaRIA_B_Wind\projects\acs\2. DATA\1. Work Unit Assessment\NWRA\impact\test\test2.gdb"
feature_out_path = r"X:\georisk\HaRIA_B_Wind\projects\acs\2. DATA\1. Work Unit Assessment\NWRA\impact\test\test.gdb"

"""
the time and the spatial reference of the boundaries
"""

#Time string gives each table and feature class created a unique name. This is useful to ensure data is not overwritten
#A spatial reference is needed when creating the feature classes, for simplicity we use the spatial reference from the boundary layer
#The geographic coordinate system is GDA2020
timestr = time.strftime("%Y%m%d_%H%M%S")
spatial_ref = arcpy.Describe(agg_boundary_loc).spatialReference

In [4]:
#Define metadata

#Metadata text can be defined here. {} indicates that a variable will be used in these locations.
#Variables are defined in the loop
#If you would like to change the variables or add variables you will need to edit the .format(variable1) function in the loop

#Aggregated table metadata
agg_table_title = '{}' # name defined in the loop
agg_table_tags = 'NWRA, ACS, risk, impact'
agg_table_summary = 'Raw tabulated {} SA1 aggregated loss values for a recurrence interval of {} years.' # state name and recurrence interval years
agg_table_description = 'Raw, tabulated HazImp calculations of average structural loss ratio, total structural loss value, \
and other values for residential buildings in {} exposed to wind speeds with a recurrence interval of {} years, \
aggregated to SA1 code (2021). Raw HazImp data location {}' # state name, recurrence interval years, location of input table in directory
agg_table_credits = 'Commonwealth of Australia (Geoscience Australia)'
agg_table_accessConstraints = 'Creative Commons Attribution 4.0 International Licence. \
<a href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</a>'

#Aggregated feature class metadata
agg_feature_title = '{}' # name defined in the loop
agg_feature_tags = agg_table_tags
agg_feature_summary = '{} SA1 aggregated loss values for a recurrence interval of {} years.' # state name and recurrence interval years
agg_feature_description = 'HazImp calculations of average structural loss ratio for residential buildings \
in {} exposed to wind speeds with a recurrence interval of {} years, aggregated to SA1 code (2021). Spatialised \
using raw HazImp output from {}' # state name, recurrence interval years, location of input table in directory
agg_feature_credits = agg_table_credits
agg_feature_accessConstraints = agg_table_accessConstraints

#Points table metadata
points_table_title = '{}' # name defined in the loop
points_table_tags = agg_table_tags
points_table_summary = 'Raw tabulated {} unit level loss values for a recurrence interval of {} years.' # state name and recurrence interval years
points_table_description = 'Raw, tabulated HazImp calculations of structural loss ratio and structural loss value for \
individual residential buildings in {} exposed to wind speeds with a recurrence interval of {} years. \
Raw HazImp data location {}' # state name, recurrence interval years, location of input table in directory
points_table_credits = agg_table_credits
points_table_accessConstraints = agg_table_accessConstraints

#Points feature class metadata
points_feature_title = '{}' # name defined in the loop
points_feature_tags = agg_table_tags
points_feature_summary = '{} unit level loss values for a recurrence interval of {} years.' # state name and recurrence interval years
points_feature_description = 'HazImp calculations of structural loss ratio for residential buildings \
in {} exposed to wind speeds with a recurrence interval of {} years. Spatialised \
using raw HazImp output from {}' # state name, recurrence interval years, location of input table in directory
points_feature_credits = agg_table_credits
points_feature_accessConstraints = agg_table_accessConstraints


In [5]:
"""
the loop
"""

#Walk through all of the files in data location
#The data location folder is root
#dirs is the next folder down (scenario code folder) if it exists
#Files are all of the files in the above folder structure

#If using a folder structure which contains a dirs you may need to edit some of the path locations 
for root, dirs, files in os.walk(data_loc):
    for files in files:
        #Define come variables which will be used for naming files and in metadata
        code = os.path.basename(os.path.normpath(root))
        code2 = os.path.basename(os.path.normpath(files))
        code2 = code2[:-4]
        code3 = code2[2:]
        
        #Although we search define what to do with 'agg.csv' first
        #The loop actually runs the '.csv' files first (I'm not sure why, possible because they come first alphabetcally)

        #For the aggregated HazImp output
        if files.endswith('agg.csv'):
            #Variables for the location of the file in use and for metadata
            hazimp_agg_loc = os.path.join(root,files)
            code4 = code3[:-4]

            #convert csv to table in ArcGIS, create a copy of the aggregate boundaries to use for the join, define names for agg table and feature class
            agg_table_name = 'hazimp_' + code + '_' + code2 + '_table_' + timestr
            agg_feature_name = 'hazimp_' + code + '_' + code2 + '_SA1_' + timestr
            arcpy.conversion.TableToTable(in_rows = hazimp_agg_loc,
                                          out_path = table_out_path,
                                          out_name = agg_table_name)
            arcpy.conversion.ExportFeatures(in_features = agg_boundary_loc,
                                            out_features = os.path.join(feature_out_path, agg_feature_name))
            
            #Add a field to the hazimp agg table and assign the SA1 CODE values 
            #(SA1 in the hazimp output is a double but it is text in the boundaries feature class, they need to be the same in order for the join to work)
            arcpy.management.AddField(in_table = os.path.join(table_out_path, agg_table_name),
                                      field_name = 'SA1_CODE21',
                                      field_type = 'TEXT')
            arcpy.management.CalculateField(in_table = os.path.join(table_out_path, agg_table_name),
                                            field = 'SA1_CODE21',
                                            expression = '!SA1_CODE!',
                                            expression_type = 'PYTHON3')
            
            #Join the aggregated hazimp output table with the SA1 boundaries
            arcpy.management.JoinField(in_data = os.path.join(feature_out_path, agg_feature_name),
                                       in_field = 'SA1_CODE21',
                                       join_table = os.path.join(table_out_path, agg_table_name),
                                       join_field = 'SA1_CODE21')

            #The copy of the SA1 boundaries we used is a national dataset but the data we are joining is not national 
            #Use update cursor to delete rows (SA1s) which have null values
            #This will leave only the SA1s in the state which have residential building (and loss) data
            with arcpy.da.UpdateCursor(os.path.join(feature_out_path, agg_feature_name), 'FID', 'FID is NULL') as cursor:
                for row in cursor:
                        cursor.deleteRow()

            #Create a metadata file and add predefined text for the agg table
            agg_table_md = md.Metadata()
            agg_table_md.title = agg_table_title.format(agg_table_name)
            agg_table_md.tags = agg_table_tags
            agg_table_md.summary = agg_table_summary.format(code, code4)
            agg_table_md.description = agg_table_description.format(code, code4, hazimp_agg_loc)
            agg_table_md.credits = agg_table_credits
            agg_table_md.accessConstraints = agg_table_accessConstraints

            #Select the agg table metadata and overwrite it with the predefined metadata
            target_md = md.Metadata(os.path.join(table_out_path, agg_table_name))
            if not target_md.isReadOnly:
                 target_md.copy(agg_table_md)
                 target_md.save()
            
            #Create a metadata file and add predefined text for the agg feature class
            agg_feature_md = md.Metadata()
            agg_feature_md.title = agg_feature_title.format(agg_feature_name)
            agg_feature_md.tags = agg_feature_tags
            agg_feature_md.summary = agg_feature_summary.format(code, code4)
            agg_feature_md.description = agg_feature_description.format(code, code4, hazimp_agg_loc)
            agg_feature_md.credits = agg_feature_credits
            agg_feature_md.accessConstraints = agg_feature_accessConstraints

            #Select the agg feature class metadata and overwrite it with the predefined metadata
            target_md = md.Metadata(os.path.join(feature_out_path, agg_feature_name))
            if not target_md.isReadOnly:
                 target_md.copy(agg_feature_md)
                 target_md.save()

        #For point HazImp output
        elif files.endswith('.csv'):
            #Variable for the location of the file in use
            hazimp_points_loc = os.path.join(root,files)

            #Convert csv to table in ArcGIS, define name for the points table
            points_table_name = 'hazimp_' + code + '_' + code2 + '_points_table_' + timestr
            arcpy.conversion.TableToTable(in_rows = hazimp_points_loc,
                                          out_path = table_out_path,
                                          out_name = points_table_name)
            
            #Spatialise the hazimp point output using the XY coordinates, define name for the points feature class
            points_feature_name = 'hazimp_' + code + '_' + code2 + '_points_' + timestr
            arcpy.management.XYTableToPoint(in_table = os.path.join(table_out_path, points_table_name),
                                            out_feature_class = os.path.join(feature_out_path, points_feature_name),
                                            x_field = 'exposure_longitude',
                                            y_field = 'exposure_latitude',
                                            coordinate_system = arcpy.SpatialReference(spatial_ref.factoryCode))
            
            #Create a metadata file and add predefined text for the points table
            points_table_md = md.Metadata()
            points_table_md.title = points_table_title.format(points_table_name)
            points_table_md.tags = points_table_tags
            points_table_md.summary = points_table_summary.format(code, code3)
            points_table_md.description = points_table_description.format(code, code3, hazimp_points_loc)
            points_table_md.credits = points_table_credits
            points_table_md.accessConstraints = points_table_accessConstraints

            #Select the points table metadata and overwrite it with the predefined metadata
            target_md = md.Metadata(os.path.join(table_out_path, points_table_name))
            if not target_md.isReadOnly:
                 target_md.copy(points_table_md)
                 target_md.save()

            #Create a metadata file and add predefined text for the points feature class
            points_feature_md = md.Metadata()
            points_feature_md.title = points_feature_title.format(points_feature_name)
            points_feature_md.tags = points_feature_tags
            points_feature_md.summary = points_feature_summary.format(code, code3)
            points_feature_md.description = points_feature_description.format(code, code3, hazimp_points_loc)
            points_feature_md.credits = points_feature_credits
            points_feature_md.accessConstraints = points_feature_accessConstraints

            #Select the points feature class metadata and overwrite it with the predefined metadata
            target_md = md.Metadata(os.path.join(feature_out_path, points_feature_name))
            if not target_md.isReadOnly:
                 target_md.copy(points_feature_md)
                 target_md.save()