This notebook takes an AEM inversion text file from the Aarhus Workbench and writes a netCDF file (see https://github.com/GeoscienceAustralia/geophys_utils)



In [1]:
from hydrogeol_utils import AEM_utils
import pandas as pd
import numpy as np
import math
import yaml
import importlib

In [2]:
#infile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\WB_Exported_Inversions\HE_WB_SCI_Smooth_v2_ks_inv.xyz"
infile = r"C:\Users\PCUser\Desktop\EK_data\AEM\KR_WBsci_smooth03\Smooth_03_MOD_inv.xyz"
# Now opent the file and delete the pipe

with open(infile, 'r') as inf:
    s = inf.read()

new_s = s.replace(';',',')


# Reomve the final
if new_s[-1:] == '\n':
    new_s = new_s[:-1]

new_file = r"C:\Users\PCUser\Desktop\EK_data\AEM\KR_WBsci_smooth03\KR_WBsci_smooth03_.xyz"

with open(new_file, 'w') as f:
    f.write(new_s)

AEM_data = AEM_utils.parse_wb_file(new_file)

In [3]:
# Create a dataframe for ease of use

df = pd.DataFrame(data = AEM_data['data'], columns = AEM_data['header'])

In [6]:
df['SIGMA_I_1']

0          13.60360
1           8.21018
2          65.87620
3           8.24402
4          10.09290
            ...    
148995     63.01200
148996     38.86510
148997     70.72140
148998    158.55400
148999    570.45100
Name: SIGMA_I_1, Length: 149000, dtype: float64

In [7]:
# Lets do some changing of the data to suit our needs

# Convert the conductivity to S/m

condcols = [s for s in df.columns if s.startswith('SIGMA_')]

df.at[:, condcols] = df.loc[:,condcols] / 1000.

In [8]:
# Replcae the nulls with -9999
null = float(AEM_data['DUMMY'])

df.replace(null, -9999., inplace=True)

df['NLAYERS'] = 30

In [9]:
# Now lets output the data into a fixed width delimited file. We choose the columns to output
# and the format to match a dfn from a different survey

export_cols = ['FID', 'LINE_NO', 'UTMX', 'UTMY', 'ELEVATION', 'RESDATA',
              'ALT', 'INVALT', 'DOI_STANDARD', 'NLAYERS']

for item in df.columns:
    if item.startswith('RHO'):
        export_cols.append(item)

for item in df.columns:
    if item.startswith('DEP_TOP'):
        export_cols.append(item)

df_exp = df[export_cols]


In [10]:
df_exp['NLAYERS']

0         30
1         30
2         30
3         30
4         30
          ..
148995    30
148996    30
148997    30
148998    30
148999    30
Name: NLAYERS, Length: 149000, dtype: int64

In [11]:
# Now we replace the columns with formatted strings

df_exp.at[:,'FID'] = ['{:6d}'.format(x) for x in df_exp['FID'].values.astype(int)]
df_exp.at[:,'LINE_NO'] = ['{:9d}'.format(x) for x in df_exp['LINE_NO'].values.astype(int)]
df_exp.at[:,'UTMX'] = ['{:10.1F}'.format(x) for x in df_exp['UTMX'].values]
df_exp.at[:,'UTMY'] = ['{:10.1F}'.format(x) for x in df_exp['UTMY'].values]
df_exp.at[:,'ELEVATION'] = ['{:8.2F}'.format(x) for x in df_exp['ELEVATION'].values]
df_exp.at[:,'RESDATA'] = ['{:7.3F}'.format(x) for x in df_exp['RESDATA'].values]
df_exp.at[:,'ALT'] = ['{:7.2F}'.format(x) for x in df_exp['ALT'].values]
df_exp.at[:,'INVALT'] = ['{:7.2F}'.format(x) for x in df_exp['INVALT'].values]
df_exp.at[:,'DOI_STANDARD'] = ['{:7.2F}'.format(x) for x in df_exp['DOI_STANDARD'].values]
df_exp.at[:,'NLAYERS'] = ['{:6d}'.format(x) for x in df_exp['NLAYERS'].values]

# Iterate through the resisitivities

for item in df_exp.columns:
    if item.startswith('RHO'):
        df_exp.at[:,item] = ['{:9.3F}'.format(x) for x in df_exp[item].values]
    elif item.startswith('DEP'):
        df_exp.at[:,item] = ['{:9.3F}'.format(x) for x in df_exp[item].values]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [12]:
# Now we output the data
#outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\nc\HowardsEast_wb_inversion_temp.dat"
outfile = r"C:\Users\PCUser\Desktop\EK_data\AEM\netCDF\EastKimberley_wb_inversion_temp.dat"


# Note use a pipe so we can easily delete later
df_exp.to_csv(outfile, sep = '|', index = False, header = False)

# Now opent the file and delete the pipe

with open(outfile, 'r') as inf:
    s = inf.read()

new_s = s.replace('|','')


# Reomve the final
if new_s[-1:] == '\n':
    new_s = new_s[:-1]

new_outfile = r"C:\Users\PCUser\Desktop\EK_data\AEM\netCDF\EastKimberley_wb_inversion.dat"

with open(new_outfile, 'w') as f:
    f.write(new_s)


In [2]:
%matplotlib inline

from geophys_utils.netcdf_converter import aseg_gdf2netcdf_converter
from geophys_utils.netcdf_converter.aseg_gdf_utils import aseg_gdf_format2dtype
from hydrogeol_utils import AEM_utils
import netCDF4
import os, math
import numpy as np
import matplotlib.pyplot as plt
# SO we can see the logging. This enables us to debug
import gc
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")


DEBUG:root:test
DEBUG:matplotlib.pyplot:Loaded backend module://ipykernel.pylab.backend_inline version unknown.


In [3]:
root = r"C:\Users\PCUser\Desktop\EK_data\AEM\KR_WBsci_smooth03"

nc_out_path = r"C:\Users\PCUser\Desktop\EK_data\AEM\netCDF\EastKimberley_wb_inversion.nc"

dat_in_path = os.path.join(root, "EastKimberley_wb_inversion.dat")

dfn_in_path = os.path.join(root, 'EastKimberley_wb_inversion.dfn')

crs_string = "EPSG:28352"

In [15]:
if os.path.exists(nc_out_path):
    os.remove(nc_out_path)

d2n = aseg_gdf2netcdf_converter.ASEGGDF2NetCDFConverter(nc_out_path, 
                                                 dat_in_path, 
                                                 dfn_in_path,
                                                 crs_string,
                                                 fix_precision=True,
                                                 remove_null_columns = False)
d2n.convert2netcdf()    

INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Reading definitions file C:\Users\PCUser\Desktop\EK_data\AEM\KR_WBsci_smooth03\EastKimberley_wb_inversion.dfn
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Reading data file C:\Users\PCUser\Desktop\EK_data\AEM\KR_WBsci_smooth03\EastKimberley_wb_inversion.dat
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:10000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:20000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:30000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:40000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:50000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:60000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:70000 lines read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:80000 lines read
INFO:geophys_utils.netcdf_conve

point 149000
layer 30


INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing comments lookup variables
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Creating dimension for comments
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:		Writing 1492 comments lookup values to array variable comments
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:		Writing comments lookup indices to array variable comments_index
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 1D int8 variable sequence
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing line lookup variables
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Creating dimension for line
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:		Writing 352 line lookup values to array variable line
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:		Writing line lookup indices to array variable line_index
INFO:geophys_utils.netcdf_conver

In [24]:
d = netCDF4.Dataset(r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\nc\HowardsEast_wb_inversion.nc", 'r')

In [25]:
d

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    geospatial_east_min: 718050.3125
    geospatial_east_max: 741266.3125
    geospatial_east_units: m
    geospatial_north_min: 8612255.0
    geospatial_north_max: 8631394.0
    geospatial_north_units: m
    title: Dataset read from ASEG-GDF file HowardsEast_wb_inversion.dat
    Conventions: CF-1.6,ACDD-1.3
    featureType: trajectory
    geospatial_vertical_min: -11.9
    geospatial_vertical_max: 77.1
    geospatial_vertical_units: m
    geospatial_vertical_resolution: point
    geospatial_vertical_positive: up
    history: Converted from ASEG-GDF file C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\nc\HowardsEast_wb_inversion.dat using definitions file C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\nc\HowardsEast_wb_inversion.dfn
    date_created: 2019-09-25T19:45:34.029179
    geospatial_east_resolution: point
    geospatial_north_resolution: point
    geospatial_bounds: POLYGON((131.1328 -12.5449, 131

In [26]:
import time
from hydrogeol_utils import spatial_functions
from geophys_utils._netcdf_point_utils import NetCDFPointUtils

In [27]:
cond_point_utils = NetCDFPointUtils(d)

In [28]:
minx = d.geospatial_east_min
maxx = d.geospatial_east_max
miny = d.geospatial_north_min
maxy = d.geospatial_north_max


In [29]:
minx

718050.3125

In [30]:
# Define gdal algorithm as string - see https://gdal.org/programs/gdal_grid.html
algorithm = 'invdist:power=2:radius1=250:radius2=250:max_points=10:'
algorithm += 'min_points=2:nodata=-999.999'

grid_kwargs = {'conductivity': {'log_grid': True,
                                'gdal_algorithm': algorithm}}

utm_wkt = 'PROJCS["GDA94 / MGA zone 52",GEOGCS["GDA94",DATUM["Geocentric_Datum_of_Australia_1994",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6283"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.01745329251994328,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4283"]],UNIT["metre",1,AUTHORITY["EPSG","9001"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",129],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],AUTHORITY["EPSG","28352"],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'

start_time = time.time()

# Currently this is too resource intense, need to make it more effecient
aem_grid = spatial_functions.grid_points_gdal(cond_point_utils,
                 grid_resolution = 100,
                 variables = 'conductivity',
                 reprojected_grid_bounds = (minx, miny, maxx, maxy),
                 grid_wkt = utm_wkt,
                 point_step=1, # Only use every 4th point
                 grid_kwargs = grid_kwargs,
                 depth_inds = np.arange(0,30))

print("Gridding time: ", time.time() - start_time, ' seconds')

Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finished gridding  temp_grid.tif
Gridding  temp_grid.tif
Finishe

In [31]:
import affine
aff = affine.Affine.from_gdal(*aem_grid['geotransform'])

In [32]:
arr= aem_grid['conductivity']

arr[np.isnan(arr)] = null

In [36]:
# Write into a raster

import rasterio

null = -999.99

outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\grids\EK_wb_layer_grids_logCond.tif"

new_dataset = rasterio.open(outfile, 'w', driver='GTiff',
                                height=aem_grid['conductivity'].shape[1], 
                                width=aem_grid['conductivity'].shape[2],
                                count=aem_grid['conductivity'].shape[0],
                                dtype=aem_grid['conductivity'].dtype,
                                crs="EPSG:28352",
                                transform=aff,
                                nodata = null
                           )

for i in range(30):
    new_dataset.write(np.log10(aem_grid['conductivity'][i]), i+1)


DEBUG:rasterio.env:Entering env context: <rasterio.env.Env object at 0x0000029D03E2E348>
DEBUG:rasterio.env:Starting outermost env
DEBUG:rasterio.env:No GDAL environment exists
DEBUG:rasterio.env:New GDAL environment <rasterio._env.GDALEnv object at 0x0000029D0288C388> created
DEBUG:rasterio._env:GDAL_DATA found in environment: 'C:\\Users\\PCUser\\Anaconda3\\envs\\hydrogeol_utils\\Library\\share\\gdal'.
DEBUG:rasterio._env:PROJ_LIB found in environment: 'C:\\Users\\PCUser\\Anaconda3\\envs\\hydrogeol_utils\\Library\\share'.
DEBUG:rasterio._env:Started GDALEnv <rasterio._env.GDALEnv object at 0x0000029D0288C388>.
DEBUG:rasterio.env:Got a copy of environment <rasterio._env.GDALEnv object at 0x0000029D0288C388> options
DEBUG:rasterio.env:Entered env context: <rasterio.env.Env object at 0x0000029D03E2E348>
DEBUG:rasterio._io:Path: UnparsedPath(path='C:\\Users\\PCUser\\Desktop\\NSC_data\\data\\AEM\\HE\\grids\\EK_wb_layer_grids_logCond.tif'), mode: w, driver: GTiff
DEBUG:rasterio._io:Skipped 

In [37]:
new_dataset.close()


DEBUG:rasterio._io:Dataset <closed DatasetWriter name='C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\grids\EK_wb_layer_grids_logCond.tif' mode='w'> has been stopped.
