This notebook demonstrates the conversion of AEM data from aseg-gdf format to netCDF format

Neil Symington
neil.symington@ga.gov.au

In [1]:
%matplotlib inline

from geophys_utils.netcdf_converter import aseg_gdf2netcdf_converter
from geophys_utils.netcdf_converter.aseg_gdf_utils import aseg_gdf_format2dtype
from scipy.io import loadmat
import netCDF4
import os, math
import numpy as np
import matplotlib.pyplot as plt
# SO we can see the logging. This enables us to debug
import gc
import pandas as pd
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")
import importlib

DEBUG:root:test
DEBUG:matplotlib.pyplot:Loaded backend module://ipykernel.pylab.backend_inline version unknown.


In [2]:
def extract_pmap_info(D):
    """
    @param: D: python dictionary from loadmat function in scipy
    
    returns
    a more usebal dictionary
    """

    freq = D['M']['f'][0,0]


    # Get the changepoint histogram

    cp = D['M']['cp'][0,0].flatten()

    cond_cells = np.linspace(D['M']['vmin'][0,0][0,0], D['M']['vmax'][0,0][0,0], D['M']['nvcells'][0,0][0,0])
    
    laybins = D['M']['lhist'][0,0][0,0][1].flatten()
    lay_prob = D['M']['lhist'][0,0][0,0][2].flatten()

    nsample = D['M']['nsample'][0,0][0,0]

    ndata = int(D['M']['ndata'][0,0][0,0])
    nsamples = np.int(D['M']['nsample'][0,0][0,0])
    
    misfit = []
    sample_no = []
   
    for i in range(nchains):
        misfit.append(D['M']['conv'][0,0]['misfit'][0,i].flatten())
        sample_no.append(D['M']['conv'][0,0]['sample'][0,i].flatten())
        
    
    return {'conductivity_pmap': freq, "change_point_pmap": cp, 'nlayer_bins': laybins, 
            'nlayer_prob': lay_prob, 'nsamples': nsample, 'ndata': ndata, 'misfit': np.array(misfit),
           'sample_no': np.array(sample_no), 'ndata': ndata}

In [3]:

root = r"C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmtdem\combined"
nc_out_path = os.path.join(root, "EastKimberley_rjmcmc.nc")

dat_in_path = os.path.join(root, 'rjmcmc.dat')


dfn_in_path = os.path.join(root, 'rjmcmc.dfn')


crs_string = "EPSG:28352"

In [15]:
if os.path.exists(nc_out_path):
    os.remove(nc_out_path)

d2n = aseg_gdf2netcdf_converter.ASEGGDF2NetCDFConverter(nc_out_path, 
                                                 dat_in_path, 
                                                 dfn_in_path,
                                                 crs_string,
                                                 fix_precision=True,
                                                 remove_null_columns = False)
d2n.convert2netcdf()                                      

AssertionError: Invalid WKT or CRS name

In [5]:
# Create a python object with the EM dataset
d = netCDF4.Dataset(nc_out_path, "a")

In [8]:
# Now we want to add the probability map data

# open a csv that maps the fiducials to their pmap matla files

df = pd.read_csv(r"C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmtdem\combined\rjmcmc_map.csv")

# Makew sure they are the same length
assert len(d['fiducial'][:]) == len(df)


In [24]:
df

Unnamed: 0,uniqueid,survey,date,flight,line,fiducial,easting,northing,elevation,altimeter,nchains,nsamples,nburnin,sampletime,misfit_lowest,misfit_average,ndepthcells,geometry,matfile
0,1,1294,20170719,20170719,100302,6825970.0,440967.4,8292342.5,9.09,0.0,32,100000,50000,13439.89,3.989116,47.273130,150,POINT (440967.4 8292342.5),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
1,2,1294,20170718,20170719,101002,6823022.0,441695.9,8289542.5,5.55,0.0,32,100000,50000,12616.30,3.008351,96.028600,150,POINT (441695.9 8289542.5),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
2,3,1294,20170718,20170719,101002,6823025.0,441776.7,8289542.0,6.14,0.0,32,100000,50000,12783.62,2.713839,91.314850,150,POINT (441776.7 8289542),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
3,4,1294,20170718,20170718,101502,6760423.0,442418.1,8287537.5,7.82,0.0,32,100000,50000,12059.15,11.444180,12.715830,150,POINT (442418.1 8287537.5),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
4,5,1294,20170718,20170718,101502,6760634.0,436747.2,8287542.5,6.30,0.0,32,100000,50000,18162.25,0.407195,0.842949,150,POINT (436747.2 8287542.5),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6443,85180,1294,20170705,20170706,320401,5700196.0,495597.8,8331542.0,12.57,0.0,32,100000,50000,16545.02,0.155445,1.451981,150,POINT (495597.8 8331542),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
6444,85260,1294,20170705,20170706,320401,5700236.0,494675.1,8331845.5,14.97,0.0,32,100000,50000,16323.60,0.136792,0.977668,150,POINT (494675.1 8331845.5),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
6445,85340,1294,20170705,20170706,320401,5700276.0,493750.6,8332166.0,13.50,0.0,32,100000,50000,16537.77,0.124921,0.500112,150,POINT (493750.6 8332166),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...
6446,85420,1294,20170705,20170706,320401,5700316.0,492781.6,8332491.0,10.31,0.0,32,100000,50000,17183.96,0.183248,0.489549,150,POINT (492781.6 8332491),C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmt...


In [12]:
#get a test
D_test = extract_pmap_info(loadmat(df['matfile'].iloc[0]))

In [27]:
ndpethcells = df['ndepthcells '].iloc[0]
nsamples = len(df)

ncond_cells = 100
nchains = d['nchains'][0].data
# From the matlab files
nmisfit = 1019
min_layers = 1
max_layers = 25
condmin = 0.002
condmax = 3.


# TODO add an assertion function here


In [28]:
D_test['conductivity_pmap'].shape

(150, 100)

In [29]:
# now we create some arrays that will be added to the netCDF file as variables
misfit_count = np.arange(1, nchains + 1)
layer_bins = np.arange(min_layers, max_layers + 1)

pmap_conductivities = np.zeros(shape = (nsamples, ndpethcells, ncond_cells), dtype = np.int32)
maxlayer_counts = np.zeros(shape = (nsamples, layer_bins.shape[0]), dtype = np.int32)
cond_cells = np.linspace(np.log10(condmin), np.log10(condmax), ncond_cells)
misfit= np.zeros(shape = (nsamples, nchains, nmisfit), dtype = np.float64)
sample_no = np.zeros(shape = (nsamples, nchains, nmisfit), dtype = np.int32)
ndata = np.zeros(shape = (nsamples), dtype = np.int)

In [30]:
# Now we need to populate the arrays

# We iterate through the fiducials to ensure we have everything ordered correctly

for i in range(nsamples):
    pmap_file = df[df['fiducial '] == d['fiducial'][i]]['matfile'].values[0]
    D = extract_pmap_info(loadmat(pmap_file))
    # Write data to arrays
    pmap_conductivities[i] = D['conductivity_pmap']
    maxlayer_counts[i] = D['nlayer_prob']
    misfit[i] = D['misfit']
    sample_no = D['sample_no']
    ndata[i] = D['ndata']
    
    

In [31]:
for j in range(sample_no.shape[1]):
    assert np.unique(sample_no[:,j]).shape[0] == 1
    
rj_number = sample_no[0]

In [32]:
# Create new dimensions

cond_pmap_dim = d.createDimension("conductivity_cells", cond_cells.shape[0])

layer_pmap_dim = d.createDimension("nlayers_cells", layer_bins.shape[0])

# Create variables for the associated values for these dimensions

cond_pmap_dim = d.createVariable("conductivity_cells","f8",("conductivity_cells",))

layer_pmap_dim = d.createVariable("nlayers_cells","i4",("nlayers_cells",))

# Fill

cond_pmap_dim[:] = cond_cells
layer_pmap_dim[:] = layer_bins

In [33]:
# Create new dimensions

chain_dim = d.createDimension("chain_dim", misfit_count.shape[0])

rjsample_dim = d.createDimension("rj_sample_dim", misfit.shape[2])

# Create variables for the associated values for these dimensions

chain_dim = d.createVariable("chain_no","i4",("chain_dim",))

rjsample_dim = d.createVariable("rj_sample_number","i4",("rj_sample_dim",))

# Fill

chain_dim[:] = misfit_count
rjsample_dim[:] = rj_number

In [34]:
# Now add the remaining variables

cond_pmap =  d.createVariable("conducitivty_bin_count","f8",("point", "layer",
                                                                  "conductivity_cells"))

layer_pmap =  d.createVariable("nlayer_bin_count","f8",("point", "nlayers_cells"))

misfits =  d.createVariable("misfit","f8",("point", "chain_dim", "rj_sample_dim"))


In [35]:
if np.unique(ndata).shape[0] == 1:
    ndat = d.createVariable('n_data', "i4")
    ndat[:] =  np.unique(ndata)[0]
else:
    ndat = d.createVariable('n_data', "i4", ('point',))
    ndat[:] =  ndata

In [36]:
cond_pmap[:] = pmap_conductivities

layer_pmap[:] = maxlayer_counts

misfits[:] = misfit

In [40]:
d.close()

In [38]:
d = netCDF4.Dataset(nc_out_path, "r")

In [39]:
d

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: Dataset read from ASEG-GDF file rjmcmc.dat
    Conventions: CF-1.6,ACDD-1.3
    featureType: trajectory
    geospatial_vertical_min: 0.19
    geospatial_vertical_max: 151.42
    geospatial_vertical_units: m
    geospatial_vertical_resolution: point
    geospatial_vertical_positive: up
    history: Converted from ASEG-GDF file C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmtdem\combined\rjmcmc.dat using definitions file C:\Users\PCUser\Desktop\EK_data\AEM\garjmcmcmtdem\combined\rjmcmc.dfn
    date_created: 2020-02-03T14:44:06.353928
    geospatial_east_resolution: point
    geospatial_north_resolution: point
    geospatial_east_min: 432931.3125
    geospatial_east_max: 551443.3125
    geospatial_east_units: m
    geospatial_north_min: 8250442.5
    geospatial_north_max: 8337973.0
    geospatial_north_units: m
    geospatial_bounds: POLYGON((488128.1875 8250442.5000, 480017.0000 8253309.0000, 4