This is a notebook for producing inversion ready AEM data files (for use with Ross C brodie's inversion codes) from a netCDF4 data and recalculated noise

In [1]:
import netCDF4
import numpy as np
import os
from sqlite3 import dbapi2 as sqlite
import sqlalchemy as db
from sqlalchemy import create_engine, event
from geophys_utils import NetCDFPointUtils
from hydrogeol_utils import spatial_functions
import pandas as pd

In [2]:
# Open the AEM data file
nc_inpath = r"C:\Users\PCUser\Desktop\AEM\EM\AUS_10021_DalyR_EM.nc"

d = netCDF4.Dataset(nc_inpath)

In [3]:
np.max(d['easting'])

910283.0

In [4]:
# Now we want to load a number of coordiantes that we will be inverting the nearest neighbour

# Extract borehole data from the database

DB_PATH = r"C:\Users\PCUser\Desktop\EK_data\Boreholes\East_Kimberley_borehole_data.sqlite"

engine = db.create_engine('sqlite:///' + DB_PATH, module=sqlite)

connection = engine.connect()

# Open the borehole data as a pandas dataframe

df_header =pd.read_sql('select Easting, Northing from borehole', connection)

# Now do the same for SNMR sites

DB_PATH = r"C:\Users\PCUser\Desktop\EK_data\SNMR\East_Kimberley_SNMR.sqlite"

engine = db.create_engine('sqlite:///' + DB_PATH, module=sqlite)

connection = engine.connect()

# Open the borehole data as a pandas dataframe

df_SNMR =pd.read_sql('select mid_X, mid_Y from sites', connection)

In [5]:
# Get coordinates in an array

coords = np.zeros(shape = (len(df_header) + len(df_SNMR), 2),
                 dtype = np.float64)

coords[:len(df_header),:] = df_header.values

coords[len(df_header):,:] = df_SNMR.values

df_coords = pd.DataFrame(coords, columns = ['Easting', 'Northing'])

In [4]:
cond_point_utils = NetCDFPointUtils(d)

# Get the AEM utm coordinates

aem_coords = np.column_stack((d['easting'][:], d['northing']))

In [7]:
# Extract the AEM conductivity using nearest neighbour
distances, indices = spatial_functions.nearest_neighbours(coords,
                                                          aem_coords,
                                                          points_required = 1,# return 10 closest points
                                                          max_distance = 250.)

In [17]:
masked_inds = np.unique(indices[np.isfinite(distances)])

In [5]:
lines = d['line'][:].data

In [6]:
lines

array([100001, 100101, 100201, 100301, 100401, 100501, 100601, 100701,
       100801, 100901, 101001, 101101, 101201, 101301, 101401, 101501,
       101502, 101601, 101701, 101801, 101901, 102001, 102101, 102201,
       102301, 102401, 102501, 102601, 102701, 102801, 102901, 103001,
       103101, 103201, 103301, 103401, 103501, 103502, 103601, 103701,
       103801, 103901, 104001, 104101, 104102, 104201, 104301, 104302,
       104401, 104501, 104601, 104701, 104801, 104901, 105001, 105101,
       105201, 105301, 105401, 105501, 105601, 105602, 105603, 105701,
       105801, 105901, 105902, 106001, 106101, 106201, 106301, 106302,
       106401, 106501, 106601, 106701, 106801, 106901, 107001, 107101,
       107201, 107301, 107401, 107501, 107601, 107701, 107801, 107901,
       108001, 108101, 108201, 108301, 108401, 108501, 108601, 108701,
       108801, 108901, 109001, 109101, 109201, 109202, 109203, 109204,
       109301, 109302, 109303, 109401, 109402, 109501, 109502, 109503,
      

In [16]:
# get a subset of the Keep data from line 300,000-400,000

Keep_lines = [x for x in lines if np.logical_and(x>100000., x<400000.)]

Keep_mask = cond_point_utils.get_lookup_mask(Keep_lines)

Keep_inds = np.where(Keep_mask)[0]

# Get 10,000 random points

#np.random.shuffle(Keep_inds)

masked_inds = Keep_inds#[:10000]


NameError: name 'lines' is not defined

In [9]:
d['northing'].shape[0]

167007

In [5]:
# Now for these points we want to extract data into a .dat file with formatting defined by a dfn



cols = ["ga_project", "utc_date", "flight", "line", "fiducial", "easting", "northing",
        "tx_height_measured", "elevation", "gps_height", "roll", "pitch", "yaw",
        "TxRx_dx", "TxRx_dy", "TxRx_dz", "low_moment_Z-component_EM_data","high_moment_Z-component_EM_data",
             "lm_z_noise", "hm_z_noise"]
inv_read = {}

for item in cols:
    # Scalar variables
    if len(d[item].shape) == 0:
        inv_read[item] = d[item][:].data
    # Vectors
    elif len(d[item].shape) == 1:
        if item == 'line':
            line_inds = d['line_index'][:]#[masked_inds]
            inv_read[item] = d[item][line_inds].data
        elif item == 'flight':
            flight_inds = d['flight_index'][:]#[masked_inds]
            inv_read[item] = d[item][flight_inds].data            
        else:
            inv_read[item] = d[item][:].data#[masked_inds].data
    # Arrays
    elif len(d[item].shape) == 2:
        inv_read[item] = d[item][:].data#[masked_inds].data
            

In [19]:
#inv_read['lm_z_noise'] = 0.5*inv_read['lm_z_noise']
#inv_read['hm_z_noise'] = 0.5*inv_read['hm_z_noise']


In [6]:
df = pd.DataFrame(index = range(d['northing'].shape[0]))#masked_inds)))

In [7]:
for item in inv_read:
    if len(inv_read[item].shape) < 2:
        df[item] = inv_read[item]
    else:
        a = inv_read[item]
        for i in range(a.shape[1]):
            df[item + '_' + str(i+1)] = a[:,i]

In [8]:
# Remove any entries with high altitude lines

df = df[df['line'] < 913000]

In [9]:
# Now we want to resave these columns as strings with a set format

# Now we replace the columns with formatted strings

df.at[:,'ga_project'] = ['{:5d}'.format(x) for x in df['ga_project'].values.astype(int)]
df.at[:,'utc_date'] = ['{:9.0F}'.format(x) for x in df['utc_date'].values]
df.at[:,'flight'] = ['{:12.2F}'.format(x) for x in df['flight'].values]
df.at[:,'line'] = ['{:8.0F}'.format(x) for x in df['line'].values]
df.at[:,'fiducial'] = ['{:12.2F}'.format(x) for x in df['fiducial'].values]
df.at[:,'easting'] = ['{:10.2F}'.format(x) for x in df['easting'].values]
df.at[:,'northing'] = ['{:11.2F}'.format(x) for x in df['northing'].values]
df.at[:,'tx_height_measured'] = ['{:8.1F}'.format(x) for x in df['tx_height_measured'].values]
df.at[:,'elevation'] = ['{:9.2F}'.format(x) for x in df['elevation'].values]
df.at[:,'gps_height'] = ['{:9.2F}'.format(x) for x in df['gps_height'].values]
df.at[:,'roll'] = ['{:7.2F}'.format(x) for x in df['roll'].values]
df.at[:,'pitch'] = ['{:7.2F}'.format(x) for x in df['pitch'].values]
df.at[:,'yaw'] = ['{:7.2F}'.format(x) for x in df['yaw'].values]
df.at[:,'TxRx_dx'] = ['{:7.2F}'.format(x) for x in df['TxRx_dx'].values]
df.at[:,'TxRx_dy'] = ['{:7.2F}'.format(x) for x in df['TxRx_dy'].values]
df.at[:,'TxRx_dz'] = ['{:7.2F}'.format(x) for x in df['TxRx_dz'].values]


# Iterate through the the data

for item in df.columns[16:]:
    df.at[:,item] = ['{:15.6E}'.format(x) for x in df[item].values]

In [10]:
# Now we output the data
outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\2017_DalyRiver_SkyTEM\inversion_ready\DR_temp.dat"

# Note use a pipe so we can easily delete later
df.to_csv(outfile, sep = '|', index = False, header = False)

# Now opent the file and delete the pipe

with open(outfile, 'r') as inf:
    s = inf.read()

new_s = s.replace('|','')


# Reomve the final
if new_s[-1:] == '\n':
    new_s = new_s[:-1]

new_outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\DR\2017_DalyRiver_SkyTEM\inversion_ready\DR_inversion_ready.dat"

with open(new_outfile, 'w') as f:
    f.write(new_s)



In [102]:
# Now we output the data
outfile = r"C:\Users\PCUser\Desktop\EK_data\AEM\inversion_ready_data\OrdKeep_inversion_ready_subset_temp.dat"

# Note use a pipe so we can easily delete later
df.iloc[0].to_csv(outfile, sep = '|', index = False, header = False)

# Now opent the file and delete the pipe

with open(outfile, 'r') as inf:
    s = inf.read()

new_s = s.replace('|','')


# Reomve the final
if new_s[-1:] == '\n':
    new_s = new_s[:-1]

new_outfile = r"C:\Users\PCUser\Desktop\EK_data\AEM\inversion_ready_data\OrdKeep_inversion_ready_onefid.dat"

with open(new_outfile, 'w') as f:
    f.write(new_s)
