In [1]:
%matplotlib inline

In [2]:
import os
import netCDF4
import numpy as np
import math
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import gc
import re
from collections import OrderedDict
from geophys_utils import NetCDFGridUtils
from geophys_utils import NetCDFLineUtils
from geophys_utils import get_gdal_wcs_dataset, get_gdal_grid_values
from geophys_utils import get_spatial_ref_from_wkt, get_coordinate_transformation, get_utm_wkt, transform_coords
from geophys_utils._transect_utils import line_length, point_along_line, utm_coords, coords2distance, sample_transect

In [3]:
# Setup proxy as required
GA_STAFF_WIFI = False

if GA_STAFF_WIFI:
    os.environ['http_proxy'] = 'http://proxy.inno.lan:3128'
    os.environ['https_proxy'] = 'http://proxy.inno.lan:3128'

In [4]:
aem_nc_path = '/g/data2/uc0/rr2_dev/rcb547/AEM_examples/AUS_10008_WestK_LCI.nc'
if not os.path.isfile(aem_nc_path):
    aem_nc_path = 'http://dapds00.nci.org.au/thredds/dodsC/uc0/rr2_dev/rcb547/AEM_examples/AUS_10008_WestK_LCI.nc'

In [5]:
aem_nc_dataset = netCDF4.Dataset(aem_nc_path + '#fillmismatch') # Note work-around for bad _FillValue: https://github.com/Unidata/netcdf-c/issues/1299

In [6]:
# The CRS definition in the file is INCORRECT in the test file! It specifies degrees, not metres.
bad_wkt = get_spatial_ref_from_wkt(aem_nc_dataset.variables['crs'].epsg_code).ExportToWkt()
bad_wkt

'GEOGCS["GDA94",DATUM["Geocentric_Datum_of_Australia_1994",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","6283"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4283"]]'

In [7]:
# Get the WKT for the right CRS - we will use this later for the netCDF transverse_mercator attribute
utm_wkt = get_utm_wkt((123.4, -18.01), 'EPSG:4326') # Coordinate in area of interest read from Google Earth
utm_wkt

'PROJCS["UTM Zone 51, Southern Hemisphere",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["Meter",1]]'

In [8]:
point_count = aem_nc_dataset.variables['point'].shape[0]
point_count

206189

In [9]:
layer_count = aem_nc_dataset.variables['layers'].shape[0]
layer_count

30

In [10]:
# Create array of 3D coordinate triples for all points
point_conductivity = aem_nc_dataset.variables['layer_conductivity_masked'][...].filled(np.NaN)
print(point_conductivity.shape)

coordinates = np.ones(shape=(point_count, layer_count, 3), 
                      dtype=aem_nc_dataset.variables['easting'].dtype) * np.NaN

for layer_index in range(layer_count):
    coordinates[:,layer_index,0] = aem_nc_dataset.variables['easting'][...]
    coordinates[:,layer_index,1] = aem_nc_dataset.variables['northing'][...]

coordinates[:,:,2] = aem_nc_dataset.variables['layer_top_elevation'][...]

print(coordinates.shape)

good_data_mask = ~np.isnan(point_conductivity)
point_conductivity = point_conductivity[good_data_mask].copy() # Discard empty values and flatten array
coordinates = coordinates[good_data_mask,:].copy() # Discard empty values and flatten array
del good_data_mask
gc.collect()

print(point_conductivity.shape)
print(coordinates.shape)

(206189, 30)
(206189, 30, 3)
(5119594,)
(5119594, 3)


In [11]:
# Compute overall x, y & z ranges of overall volume
ranges = np.array(((math.floor(min(coordinates[:,0]) / 10.0) * 10.0, 
                   math.ceil(max(coordinates[:,0]) / 10.0) * 10.0),
                  (math.floor(min(coordinates[:,1]) / 10.0) * 10.0, 
                   math.ceil(max(coordinates[:,1]) / 10.0) * 10.0),
                  (math.floor(min(coordinates[:,2]) / 10.0) * 10.0, 
                   math.ceil(max(coordinates[:,2]) / 10.0) * 10.0)))
print(ranges)

[[ 5.54800e+05  8.01350e+05]
 [ 7.94201e+06  8.13143e+06]
 [-5.10000e+02  2.50000e+02]]


In [12]:
# Compute centre coordinates of overall volume
centres = np.array([(ranges[dim_index,0] + ranges[dim_index,1]) / 2.0 for dim_index in range(3)])
print(centres)

[ 6.78075e+05  8.03672e+06 -1.30000e+02]


In [13]:
# Compute x, y & z grid ranges for area of interest 10km x 10km centred on overall centre
xysize = 10000.0
grid_ranges = np.array(((centres[0]-xysize/2.0, centres[0]+xysize/2.0), 
               (centres[1]-xysize/2.0, centres[1]+xysize/2.0),
               (ranges[2,0], ranges[2,1])))
grid_ranges

array([[ 6.73075e+05,  6.83075e+05],
       [ 8.03172e+06,  8.04172e+06],
       [-5.10000e+02,  2.50000e+02]])

In [14]:
# Create mask to exclude points outside area of interest
spatial_mask = np.ones(shape=(coordinates.shape[0],), dtype=bool)
print(np.count_nonzero(spatial_mask))
spatial_mask[np.where(coordinates[:,0] < grid_ranges[0,0])] = False
print(np.count_nonzero(spatial_mask))
spatial_mask[np.where(coordinates[:,0] > grid_ranges[0,1])] = False
print(np.count_nonzero(spatial_mask))
spatial_mask[np.where(coordinates[:,1] < grid_ranges[1,0])] = False
print(np.count_nonzero(spatial_mask))
spatial_mask[np.where(coordinates[:,1] > grid_ranges[1,1])] = False
print(np.count_nonzero(spatial_mask))

5119594
1610920
155609
112059
12042


In [15]:
# Set horizontal (xy) & vertical (z) resolution
xyres = 100.0 # 100m/pixel horizontally
zres = 10.0 # 10m/pixel vertically

In [16]:
# Round z ranges for grid up/down to nearest zres multiple
grid_ranges = np.array((grid_ranges[0], grid_ranges[1], 
                        (math.floor(min(coordinates[spatial_mask][:,2]) / zres) * zres,
                         math.ceil(max(coordinates[spatial_mask][:,2]) / zres) * zres)
                       )
                      )
grid_ranges

array([[ 6.73075e+05,  6.83075e+05],
       [ 8.03172e+06,  8.04172e+06],
       [-1.70000e+02,  1.10000e+02]])

In [17]:
# Compute regular coordinate grids for resampling
resampling_method = 'linear'

grids = tuple(np.mgrid[grid_ranges[0][0]:grid_ranges[0][1]+xyres/2.0:xyres, 
                                  grid_ranges[1][0]:grid_ranges[1][1]+xyres/2.0:xyres,
                                  grid_ranges[2][0]:grid_ranges[2][1]+zres/2.0:zres]
             )

#print(grids)

In [18]:
# Resample point-wise conductivity into regular 3D grid
# This can take a little while
conductivity_grid = griddata(coordinates[spatial_mask],
         point_conductivity[spatial_mask],
         grids, 
         method=resampling_method)
#conductivity_grid

In [19]:
# Determine all X values with data
#x_list = sorted(list(set(np.where(~np.isnan(conductivity_grid))[0])))
#y_list = sorted(list(set(np.where(~np.isnan(conductivity_grid))[1])))
#z_list = sorted(list(set(np.where(~np.isnan(conductivity_grid))[2])))

# Plot yz slices with log colour stretch
#for x in x_list:
#    plt.figure(figsize=(30,20))    
#    plt.imshow(np.log(np.transpose(conductivity_grid[x,:,::-1])), cmap='Spectral_r')

In [20]:
# Determine slicing to exclude no-data areas around edges
data_mask = ~np.isnan(conductivity_grid)
data_slices = [slice(min(np.where(data_mask)[dim_index]), max(np.where(data_mask)[dim_index])+1)
               for dim_index in range(3)
              ]
data_slices

[slice(13, 100, None), slice(1, 100, None), slice(1, 28, None)]

In [21]:
# Set up dimension arrays for netCDF
dimensions = OrderedDict()
dimensions['z'] = grids[2][0,0,:][data_slices[2]]
dimensions['y'] = grids[1][0,:,0][data_slices[1]]
dimensions['x'] = grids[0][:,0,0][data_slices[0]]
dimensions

OrderedDict([('z',
              array([-160., -150., -140., -130., -120., -110., -100.,  -90.,  -80.,
                      -70.,  -60.,  -50.,  -40.,  -30.,  -20.,  -10.,    0.,   10.,
                       20.,   30.,   40.,   50.,   60.,   70.,   80.,   90.,  100.])),
             ('y',
              array([8031820., 8031920., 8032020., 8032120., 8032220., 8032320.,
                     8032420., 8032520., 8032620., 8032720., 8032820., 8032920.,
                     8033020., 8033120., 8033220., 8033320., 8033420., 8033520.,
                     8033620., 8033720., 8033820., 8033920., 8034020., 8034120.,
                     8034220., 8034320., 8034420., 8034520., 8034620., 8034720.,
                     8034820., 8034920., 8035020., 8035120., 8035220., 8035320.,
                     8035420., 8035520., 8035620., 8035720., 8035820., 8035920.,
                     8036020., 8036120., 8036220., 8036320., 8036420., 8036520.,
                     8036620., 8036720., 8036820., 8036920.

In [22]:
# Create new NetCDF file
nc_out_path = './conductivity_grid.nc'
nc_output_dataset = netCDF4.Dataset(nc_out_path, mode="w", clobber=True, format=aem_nc_dataset.file_format)

In [23]:
# Create dimensions and dimension variables
for dimension_name, dimension_values in iter(dimensions.items()):
    nc_output_dataset.createDimension(dimname=dimension_name, size=len(dimension_values))

    dimension_variable = nc_output_dataset.createVariable(dimension_name, 
                                              dimension_values.dtype, 
                                              (dimension_name,)
                                              )
    dimension_variable[...] = dimension_values

In [24]:
# Create and populate data variable
fill_value = aem_nc_dataset.variables['layer_conductivity_masked']._FillValue
units = aem_nc_dataset.variables['layer_conductivity_masked'].units

conductivity_variable = nc_output_dataset.createVariable('conductivity',
                                                         conductivity_grid.dtype,
                                                         list(dimensions.keys()),
                                                         fill_value=fill_value
                                                         )
                                 
conductivity_variable[...] = conductivity_grid[data_slices].transpose() # Reverse axis order for netCDF
conductivity_variable.units = units
conductivity_variable.grid_mapping = "transverse_mercator"

conductivity_variable[...][np.isnan(conductivity_variable[...])] = fill_value

  # This is added back by InteractiveShellApp.init_path()


In [25]:
# Set up GeoTransform
# Example: transverse_mercator:GeoTransform = "628000 1 0 6849000 0 -1 " ;
GeoTransform = [dimensions['x'][0] - xyres / 2,
                xyres,
                0,
                dimensions['y'][0] - xyres / 2, 
                0,
                xyres,
               ]
GeoTransform

[674325.0, 100.0, 0, 8031770.0, 0, 100.0]

In [26]:
# Extract values from WKT and create transverse_mercator (crs) variable
# There has to be a better way to do this!
transverse_mercator_values = {}
s = re.search('SPHEROID\["WGS 84",([^,]+),([^,]+),', utm_wkt)
transverse_mercator_values['semi_major_axis'] = float(s.group(1))
transverse_mercator_values['inverse_flattening'] = float(s.group(2))

s = re.search('PARAMETER\["latitude_of_origin",([^\]]+)\]', utm_wkt)
transverse_mercator_values['latitude_of_projection_origin'] = float(s.group(1))

s = re.search('PARAMETER\["scale_factor",([^\]]+)\]', utm_wkt)
transverse_mercator_values['scale_factor_at_central_meridian'] = float(s.group(1))

s = re.search('PARAMETER\["central_meridian",([^\]]+)\]', utm_wkt)
transverse_mercator_values['longitude_of_central_meridian'] = float(s.group(1))

s = re.search('PARAMETER\["false_northing",([^\]]+)\]', utm_wkt)
transverse_mercator_values['false_northing'] = float(s.group(1))

s = re.search('PARAMETER\["false_easting",([^\]]+)\]', utm_wkt)
transverse_mercator_values['false_easting'] = float(s.group(1))

s = re.search('PRIMEM\["Greenwich",([^,]+),', utm_wkt)
transverse_mercator_values['longitude_of_prime_meridian'] = float(s.group(1))
       
transverse_mercator_values['grid_mapping_name'] = 'transverse_mercator'
transverse_mercator_values['spatial_ref'] = utm_wkt
transverse_mercator_values['GeoTransform'] = ' '.join([str(value) for value in GeoTransform])

transverse_mercator_variable = nc_output_dataset.createVariable('transverse_mercator',
                                                                'i1',
                                                                ()
                                                                )

transverse_mercator_variable.setncatts(transverse_mercator_values)

In [27]:
# Check variable sizes & attributes
nc_output_dataset.variables

OrderedDict([('z', <class 'netCDF4._netCDF4.Variable'>
              float64 z(z)
              unlimited dimensions: 
              current shape = (27,)
              filling on, default _FillValue of 9.969209968386869e+36 used),
             ('y', <class 'netCDF4._netCDF4.Variable'>
              float64 y(y)
              unlimited dimensions: 
              current shape = (99,)
              filling on, default _FillValue of 9.969209968386869e+36 used),
             ('x', <class 'netCDF4._netCDF4.Variable'>
              float64 x(x)
              unlimited dimensions: 
              current shape = (87,)
              filling on, default _FillValue of 9.969209968386869e+36 used),
             ('conductivity', <class 'netCDF4._netCDF4.Variable'>
              float64 conductivity(z, y, x)
                  _FillValue: 9.969209968386869e+36
                  units: mS/m
                  grid_mapping: transverse_mercator
              unlimited dimensions: 
              current s

In [28]:
# Output netCDF
nc_output_dataset.close()