This notebook takes borehole data from the East Kimberley, interpolates/ averages the data so that it they can be formulated as a sample vs feature array. This array can be used to investigate correlations between datasets, impute the array  and be used as training data for interpretations at boreholes

Neil Symington
neil.symington@ga.gov.au

In [1]:
%matplotlib inline

In [2]:
from sqlite3 import dbapi2 as sqlite
from shapely.geometry import Polygon
import pandas as pd
import geopandas as gpd
import numpy as np
from hydrogeol_utils import spatial_functions, AEM_utils, plotting_utils
from geophys_utils._netcdf_point_utils import NetCDFPointUtils
import matplotlib.pyplot as plt
import os
import sqlalchemy as db
from sqlalchemy import create_engine, event
import netCDF4

  from ._conv import register_converters as _register_converters


In [3]:
# First we bring in polygons for the project area

gdf = gpd.read_file(r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\Neil\Keep_spatial\Keep_River_polygons.shp")

# Visualise the palaeovalley extent as defined by the magnetics
#gdf.loc[[0],'geometry'].plot()

# Create a unary union so we can query our spatially query the bores
OrdKeep = gdf.geometry.unary_union.wkt


In [4]:
# Extract data from the database

path = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\spatialite"

DB_PATH = os.path.join(path, "East_Kimberley_Boreholes.sqlite")

SPATIALITE_PATH = r'C:\mod_spatialite-4.3.0a-win-amd64'

# Add spatialite dll to path
os.environ['PATH'] = SPATIALITE_PATH + ';' + os.environ['PATH']

engine = db.create_engine('sqlite:///' + DB_PATH, module=sqlite)

@event.listens_for(engine, 'connect')
def connect(dbapi_connection, connection_rec):
    dbapi_connection.enable_load_extension(True)
    dbapi_connection.execute('SELECT load_extension("mod_spatialite")')


connection = engine.connect()


In [5]:
# Import the header table using a sql query

header_query = """

select

   b.*

from

   borehole b

where

    within(GeomFromText('{}'), b.geometry);
    

""".format(OrdKeep)

print(header_query)

df_header = pd.read_sql(header_query, connection, params = None)



select

   b.*

from

   borehole b

where

    within(GeomFromText('POLYGON ((503725.1616292503 8365005.710026997, 503797.9112354947 8365049.777662147, 545533.9824877982 8363955.027693332, 555824.2588038964 8337896.865118176, 558946.0197683489 8337454.919065265, 557778.2797407188 8337239.809060173, 556205.879152536 8336930.4843543, 557487.5911249644 8333684.793186628, 557496.2158347503 8333662.952753694, 544392.6445761472 8283029.217971241, 544357.8626615324 8282894.816577523, 503524.2702587695 8237140.634983661, 448598.5557859408 8240759.81529371, 448510.4090386229 8240765.623483469, 473911.8997739411 8299766.09539283, 473902.8962494675 8299786.129389727, 457250.0834351546 8336857.594840728, 503725.1616292503 8365005.710026997))'), b.geometry);
    




In [6]:
# First import all the datasets

enos = df_header.borehole_id.values


In [7]:
# Now load the various datasets

def extract_sql_with_enos(table_name, columns, connection, enos):
    # Create a string of placeholders
    st_eno = ','.join(str(x) for x in enos)
    
    query = "select t."
    cols = ", t.".join(columns)
    query += cols
    query += " from "
    query += table_name
    query += " t where t.borehole_id in ({});".format(st_eno)
    
    return pd.read_sql(query, connection)

In [8]:
df_bNMR = extract_sql_with_enos("boreholeNMR_data", ['Depth', 'Total_water_content',
                                                'Clay_water_content', 'Capillary_water_content',
                                               'Free_water_content', 'K_SDR', 'borehole_id'], connection, enos)

df_lithology = extract_sql_with_enos("borehole_lithology", ['Depth_from', 'Depth_to',
                                                'Lithology_name', 'Lithology_description',
                                                           'borehole_id'], connection, enos)

df_indgam = extract_sql_with_enos("induction_gamma_data", ['Depth', 'Conductivity',
                                                'Gamma_calibrated', 'GR',
                                                          'borehole_id'], connection, enos)

df_EC_pH = extract_sql_with_enos("pore_fluid_EC_pH", ['Depth', 'EC', 'pH', 'borehole_id'],
                                 connection, enos)


In [9]:
# Our first investigation will be looking at relationships between these borehole data 
# and the AEM

# load AEM

nc_dir = r"\\prod.lan\active\proj\futurex\East_Kimberley\Data\Processed\Geophysics\AEM\EK_nbc_inversions\OrdKeep_borehole_constrained\netcdf"

ek_cond = netCDF4.Dataset(os.path.join(nc_dir, "OrdKeep2019_ModeExp_cor2DLogOrd.nc"))

# Create an instance of point utils

cond_point_util = NetCDFPointUtils(ek_cond)

wkt, aem_coords = cond_point_util.utm_coords(cond_point_util.xycoords)

In [10]:
# For every bore we will find the distance and netcdf
# index for each borehole that is within 1 km of an AEM fiducial

distances, indices = spatial_functions.nearest_neighbours(df_header[['Easting','Northing']], aem_coords,
                                                        points_required = 10, max_distance = 1000.)

In [11]:
# Here we will create a table of AEM conductivity profiles for each
# borehole. To guard against anomolour points we will take an inverse
# distance weighting of conductivity from the 10 closest points

# REsults are written into a dataframe

df_conductivity = pd.DataFrame(columns = {'borehole_id',
                                          'Depth_from',
                                          'Depth_to',
                                          'conductivity'})

# Create a flag for if there is AEM data for the borehole

df_header['AEM_proximal'] = 0

for i, (index, row) in enumerate(df_header.iterrows()):
    # If the distances are not infinite (i.e. the boreholes disatnce is greater
    # than the maximum)
    if np.isfinite(np.sum(distances[i])):
        
        # Extract a representative profile
        df_conductivity_profile = AEM_utils.extract_conductivity_profile(ek_cond,
                                                   distances[i], indices[i],
                                                   as_dataframe = True,
                                                  mask_below_doi=True)
        
        df_conductivity_profile['borehole_id'] = row.borehole_id

        # Append it to the dataframe
        df_conductivity = df_conductivity.append(df_conductivity_profile)
        
        # Update the flag
        df_header.at[index, 'AEM_proximal'] = 1 


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


In [20]:
# Spit out this as a csv

outfile = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\AEM\borhole_interpolated_AEM_profiles.csv"

df_conductivity.to_csv(outfile, index=False)

In [18]:
# Lets do a quick visual analysis of borehole induction vs AEM for a site

outdir = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\Neil\AEM_vs_induction_plots"

for index, row in df_header.iterrows():
    
    if (row['AEM_proximal'] == 1) & (row['Induction_acquired'] == 1):
        
        fig, (ax1,ax2) = plt.subplots(1,2, sharey = True, figsize = (8,6))
        
        # Extract the data using eno
        
        eno = row['borehole_id']
        
        
        # Extract induction
        
        indmask = df_indgam['borehole_id'] == eno
        
        induction_profile = df_indgam[indmask]['Conductivity'].values
        
        depth = df_indgam[indmask]['Depth'].values
        
        ax2 = plotting_utils.plot_downhole_log(ax2, induction_profile,
                                  depth,log_plot=True)
        
        
        # Extract AEM conductivity
        condmask = df_conductivity['borehole_id'] == eno
        
        conductivity_profile = df_conductivity[condmask]['conductivity'].values
        
        depth_top= df_conductivity[condmask]['Depth_from'].values
        
        
        # So the plots are on a similar scale we clip the conductivity to
        # the depth of the bore + 20 m
        
        depth_mask = depth_top < (np.max(depth) + 20.) 
        
        ax1 = plotting_utils.plot_AEM_conductivity_profile(ax1, conductivity_profile[depth_mask],
                                  depth_top[depth_mask], doi=None, log_plot=True)
        
        
        ax1.set_title("AEM inverted model")
        ax1.set_ylabel("Depth")
        ax1.set_xlabel("AEM bulk Conductivity (S/m)")
        ax2.set_title("Borehole induction data")

        ax2.set_xlabel("borehole induction conductivity (S/m)")
        plt.savefig(os.path.join(outdir, row['Borehole_name'] + "_conductivity_vs_induction.png"))
        
        plt.close()
        

  out[a <= 0] = -1000
