This notebook demonstrates the conversion of AEM data from aseg-gdf format to netCDF format

Neil Symington
neil.symington@ga.gov.au

In [1]:
%matplotlib inline

from geophys_utils.netcdf_converter import aseg_gdf2netcdf_converter
from geophys_utils.netcdf_converter.aseg_gdf_utils import aseg_gdf_format2dtype
from scipy.io import loadmat
import netCDF4
import os, math
import numpy as np
import matplotlib.pyplot as plt
# SO we can see the logging. This enables us to debug
import gc
import pandas as pd
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")
import importlib

DEBUG:root:test


In [2]:
def extract_pmap_info(D):
    """
    @param: D: python dictionary from loadmat function in scipy
    
    returns
    a more usebal dictionary
    """

    freq = D['M']['f'][0,0]


    # Get the changepoint histogram

    cp = D['M']['cp'][0,0].flatten()

    cond_cells = np.linspace(D['M']['vmin'][0,0][0,0], D['M']['vmax'][0,0][0,0], D['M']['nvcells'][0,0][0,0])
    
    laybins = D['M']['lhist'][0,0][0,0][1].flatten()
    lay_prob = D['M']['lhist'][0,0][0,0][2].flatten()

    nsample = D['M']['nsample'][0,0][0,0]

    ndata = int(D['M']['ndata'][0,0][0,0])
    nsamples = np.int(D['M']['nsample'][0,0][0,0])
    
    misfit = []
    sample_no = []
   
    for i in range(nchains):
        misfit.append(D['M']['conv'][0,0]['misfit'][0,i].flatten())
        sample_no.append(D['M']['conv'][0,0]['sample'][0,i].flatten())
        
    
    return {'conductivity_pmap': freq, "change_point_pmap": cp, 'nlayer_bins': laybins, 
            'nlayer_prob': lay_prob, 'nsamples': nsample, 'ndata': ndata, 'misfit': np.array(misfit),
           'sample_no': np.array(sample_no), 'ndata': ndata}

In [3]:

root = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\garjmcmctdem_2\combined"
nc_out_path = os.path.join(root, "DalyRiver_rjmcmc.nc")

dat_in_path = os.path.join(root, 'rjmcmc.dat')


dfn_in_path = os.path.join(root, 'rjmcmc.dfn')


crs_string = "EPSG:28352"

In [4]:
if os.path.exists(nc_out_path):
    os.remove(nc_out_path)

d2n = aseg_gdf2netcdf_converter.ASEGGDF2NetCDFConverter(nc_out_path, 
                                                 dat_in_path, 
                                                 dfn_in_path,
                                                 crs_string,
                                                 fix_precision=True,
                                                 remove_null_columns = False)
d2n.convert2netcdf()                                      

INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Reading definitions file C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\garjmcmctdem_2\combined\rjmcmc.dfn
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Reading data file C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\garjmcmctdem_2\combined\rjmcmc.dat


[{'short_name': 'comments', 'format': 'A4', 'long_name': 'COMMENTS', 'dtype': '<U4', 'columns': 1, 'width_specifier': 4, 'decimal_places': 0, 'variable_attributes': {'aseg_gdf_format': 'A4'}}, {'short_name': 'uniqueid', 'format': 'I12', 'long_name': 'Inversion sequence number', 'dtype': 'int64', 'columns': 1, 'width_specifier': 12, 'decimal_places': 0, 'variable_attributes': {'aseg_gdf_format': 'I12'}}, {'short_name': 'survey', 'format': 'I12', 'long_name': 'Survey number', 'dtype': 'int64', 'columns': 1, 'width_specifier': 12, 'decimal_places': 0, 'variable_attributes': {'aseg_gdf_format': 'I12'}}, {'short_name': 'date', 'format': 'I12', 'long_name': 'Date number', 'dtype': 'int64', 'columns': 1, 'width_specifier': 12, 'decimal_places': 0, 'variable_attributes': {'aseg_gdf_format': 'I12'}}, {'short_name': 'flight', 'format': 'I12', 'long_name': 'Flight number', 'dtype': 'int64', 'columns': 1, 'width_specifier': 12, 'decimal_places': 0, 'variable_attributes': {'aseg_gdf_format': 'I12'}

INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:A total of 2309 points were read
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable uniqueid changed from int64 to int16
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable survey changed from int64 to int16
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable date changed from int64 to int32
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable flight changed from int64 to int32
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable line changed from int64 to int32
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable fiducial changed from float64 to float32
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Datatype for variable northing changed from float64 to float32
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:

point 2309
layer 250


INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_mean
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_mode
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_p50
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_p10
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_p90
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity__highestlikelihood
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D float32 variable conductivity_lowestmisfit
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:	Writing 2D int16 variable change_point
INFO:geophys_utils.netcdf_converter.aseg_gdf2netcdf_converter:Creating crs, longitude and latitude variables for unp

In [6]:

# Create a python object with the EM dataset
d = netCDF4.Dataset(nc_out_path, "a")

In [7]:
# Now we want to add the probability map data

# open a csv that maps the fiducials to their pmap matla files

df = pd.read_csv(os.path.join(root, "rjmcmc_map.csv"))

# Makew sure they are the same length
assert len(d['fiducial'][:]) == len(df)


In [8]:
df

Unnamed: 0,uniqueid,survey,date,flight,line,fiducial,easting,northing,elevation,altimeter,nchains,nsamples,nburnin,sampletime,misfit_lowest,misfit_average,ndepthcells,geom,matfile
0,10,1304,20170813,20170813,102701,1056440.5,738906.1,8462591.0,34.94,0,32,100000,50000,17868.29,0.285999,0.644066,250,POINT (738906.1 8462591),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
1,90,1304,20170813,20170813,102701,1056480.5,739840.1,8462589.0,21.58,0,32,100000,50000,18258.90,1.178937,1.573929,250,POINT (739840.1 8462589),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
2,170,1304,20170813,20170813,102701,1056520.5,740738.5,8462576.0,34.90,0,32,100000,50000,17636.06,0.527235,1.021829,250,POINT (740738.5 8462576),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
3,250,1304,20170813,20170813,102701,1056560.5,741620.0,8462580.0,33.87,0,32,100000,50000,17393.27,0.509535,0.881683,250,POINT (741620 8462580),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
4,330,1304,20170813,20170813,102701,1056600.5,742520.0,8462588.0,37.72,0,32,100000,50000,17790.94,0.322359,0.736969,250,POINT (742520 8462588),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2304,44150,1304,20170823,20170823,109204,1916469.0,861174.3,8393667.0,122.55,0,32,100000,50000,15940.47,1.967953,7.677015,250,POINT (861174.3 8393667),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
2305,44230,1304,20170823,20170823,109204,1916509.0,862098.9,8393678.0,125.80,0,32,100000,50000,15834.07,1.369421,33.787940,250,POINT (862098.9 8393678),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
2306,44310,1304,20170823,20170823,109204,1916549.0,862945.7,8393666.0,128.09,0,32,100000,50000,12509.67,3.325986,319.501700,250,POINT (862945.7 8393666),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...
2307,44390,1304,20170823,20170823,109204,1916589.0,863822.0,8393673.0,127.79,0,32,100000,50000,15545.04,0.488101,247.309000,250,POINT (863822 8393673),C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\g...


In [10]:
ndpethcells = df['ndepthcells '].iloc[0]
nsamples = len(df)

ncond_cells = 100
nchains = d['nchains'][0].data
# From the matlab files
nmisfit = 1019
min_layers = 1
max_layers = 25
condmin = 0.002
condmax = 3.


# TODO add an assertion function here


In [11]:
#get a test
D_test = extract_pmap_info(loadmat(df['matfile'].iloc[0]))

In [12]:
D_test['conductivity_pmap'].shape

(250, 100)

In [13]:
# now we create some arrays that will be added to the netCDF file as variables
misfit_count = np.arange(1, nchains + 1)
layer_bins = np.arange(min_layers, max_layers + 1)

pmap_conductivities = np.zeros(shape = (nsamples, ndpethcells, ncond_cells), dtype = np.int32)
maxlayer_counts = np.zeros(shape = (nsamples, layer_bins.shape[0]), dtype = np.int32)
cond_cells = np.linspace(np.log10(condmin), np.log10(condmax), ncond_cells)
misfit= np.zeros(shape = (nsamples, nchains, nmisfit), dtype = np.float64)
sample_no = np.zeros(shape = (nsamples, nchains, nmisfit), dtype = np.int32)
ndata = np.zeros(shape = (nsamples), dtype = np.int)

In [14]:
# Now we need to populate the arrays

# We iterate through the fiducials to ensure we have everything ordered correctly

for i in range(nsamples):
    pmap_file = df[df['fiducial '] == d['fiducial'][i]]['matfile'].values[0]
    D = extract_pmap_info(loadmat(pmap_file))
    # Write data to arrays
    pmap_conductivities[i] = D['conductivity_pmap']
    maxlayer_counts[i] = D['nlayer_prob']
    misfit[i] = D['misfit']
    sample_no = D['sample_no']
    ndata[i] = D['ndata']
    
    

In [15]:
for j in range(sample_no.shape[1]):
    assert np.unique(sample_no[:,j]).shape[0] == 1
    
rj_number = sample_no[0]

In [16]:
# Create new dimensions

cond_pmap_dim = d.createDimension("conductivity_cells", cond_cells.shape[0])

layer_pmap_dim = d.createDimension("nlayers_cells", layer_bins.shape[0])

# Create variables for the associated values for these dimensions

cond_pmap_dim = d.createVariable("conductivity_cells","f8",("conductivity_cells",))

layer_pmap_dim = d.createVariable("nlayers_cells","i4",("nlayers_cells",))

# Fill

cond_pmap_dim[:] = cond_cells
layer_pmap_dim[:] = layer_bins

In [17]:
# Create new dimensions

chain_dim = d.createDimension("chain_dim", misfit_count.shape[0])

rjsample_dim = d.createDimension("rj_sample_dim", misfit.shape[2])

# Create variables for the associated values for these dimensions

chain_dim = d.createVariable("chain_no","i4",("chain_dim",))

rjsample_dim = d.createVariable("rj_sample_number","i4",("rj_sample_dim",))

# Fill

chain_dim[:] = misfit_count
rjsample_dim[:] = rj_number

In [18]:
# Now add the remaining variables

cond_pmap =  d.createVariable("conductivity_bin_count","f8",("point", "layer",
                                                                  "conductivity_cells"))

layer_pmap =  d.createVariable("nlayer_bin_count","f8",("point", "nlayers_cells"))

misfits =  d.createVariable("misfit","f8",("point", "chain_dim", "rj_sample_dim"))


In [19]:
if np.unique(ndata).shape[0] == 1:
    ndat = d.createVariable('n_data', "i4")
    ndat[:] =  np.unique(ndata)[0]
else:
    ndat = d.createVariable('n_data', "i4", ('point',))
    ndat[:] =  ndata

In [20]:
cond_pmap[:] = pmap_conductivities

layer_pmap[:] = maxlayer_counts

misfits[:] = misfit

In [24]:
d.close()

In [22]:
d = netCDF4.Dataset(nc_out_path, "r")

In [23]:
d

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: Dataset read from ASEG-GDF file rjmcmc.dat
    Conventions: CF-1.6,ACDD-1.3
    featureType: trajectory
    geospatial_vertical_min: 21.58
    geospatial_vertical_max: 304.99
    geospatial_vertical_units: m
    geospatial_vertical_resolution: point
    geospatial_vertical_positive: up
    history: Converted from ASEG-GDF file C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\garjmcmctdem_2\combined\rjmcmc.dat using definitions file C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\garjmcmctdem_2\combined\rjmcmc.dfn
    date_created: 2020-02-18T14:14:23.190896
    geospatial_east_resolution: point
    geospatial_north_resolution: point
    geospatial_east_min: 706217.1875
    geospatial_east_max: 867178.5
    geospatial_east_units: m
    geospatial_north_min: 8348056.0
    geospatial_north_max: 8471025.0
    geospatial_north_units: m
    geospatial_bounds: POLYGON((822927.1250 8348056.0000, 707986.81