A notebook finds the relationship between groundwater samples (TDS) and AEM conductivity

Neil Symington
neil.symington@ga.gov.au

In [1]:
%matplotlib widget

In [2]:
import pandas as pd
import rasterio
from sqlite3 import dbapi2 as sqlite
import numpy as np
from hydrogeol_utils import SNMR_utils, AEM_utils, spatial_functions, borehole_utils, grid_utils
from shapely.geometry import Polygon, shape
import netCDF4
import math
import time
import os, glob
import gc
from geophys_utils._netcdf_line_utils import NetCDFLineUtils
from geophys_utils._netcdf_point_utils import NetCDFPointUtils
from geophys_utils import points2convex_hull
import rasterio
from rasterio import Affine
from rasterio.warp import reproject, Resampling
from scipy import interpolate, stats
import sqlalchemy as db
from sqlalchemy import create_engine, event
import matplotlib.pyplot as plt
from hydrogeol_utils.db_utils import makeCon, closeCon

In [3]:
# Function for finding the nea

def get_bore(df, borehole_id):
    """
    @param: df: pandas dataframe with borehole data
    @param: borehole_id: integer with identification number of borehole
    
    returns
    dataframe with only rows corresponding to particular bore
    """
    mask = df['borehole_id'] == borehole_id
    return df[mask]

In [4]:
# Bring the AEM data into memory

cond_path = r"C:\Users\PCUser\Desktop\AEM\LCI\HowardE_WB_MGA52.nc"
cond_dataset = netCDF4.Dataset(cond_path, 'r')

In [5]:
# Create a convex hull around the Keep River area based on line 3xxx,xxx

# Initialise an instance of the netCDF point and line utility classes
cond_line_utils = NetCDFLineUtils(cond_dataset)
cond_point_utils = NetCDFPointUtils(cond_dataset)

# Display the lines for the conductivity mode

lines = cond_line_utils.line

# Get the utm coordinates using the mask created above
utm_wkt, aem_coords = cond_point_utils.utm_coords(cond_point_utils.xycoords)


In [6]:
# Bring in the groundwater samples

infile = r"C:\Users\PCUser\Desktop\NSC_data\data\bores\HE_salinity.csv"

df_gw = pd.read_csv(infile)

df_gw['conductivity'] = np.nan

In [7]:

# Extract the AEM conductivity using nearest neighbour
distances, indices = spatial_functions.nearest_neighbours(df_gw[['Easting','Northing']],
                                                          aem_coords,
                                                          points_required = 1,# return 1 closest point
                                                          max_distance = 250.)
# Remove nulls which are >250m from a FID
mask = np.isfinite(distances)

indices = indices[mask]
df_gw = df_gw[mask]

In [8]:
# Extract variables as arrays using the index mask
conductivity_profile = cond_dataset['conductivity'][indices]

depth_tops = cond_dataset['layer_top_depth'][indices]

depth_bottom = np.nan*np.ones(depth_tops.shape, dtype = np.float32)

depth_bottom[:,:-1] = depth_tops[:,1:]

east = cond_dataset['easting'][indices]
north = cond_dataset['northing'][indices]
doi = cond_dataset['depth_of_investigation'][indices]

In [9]:
# Create an AEM dataframe

df_AEM = pd.DataFrame(columns = ['Depth_from', 'Depth_to',
                                 'log_conductivity', 'easting',
                                 'northing', 'borehole_id',
                                 'doi'])

df_AEM['log_conductivity'] = np.log10(conductivity_profile.flatten())
df_AEM['Depth_from'] = depth_tops.flatten()
df_AEM['Depth_to'] = depth_bottom.flatten()

df_AEM['easting'] = np.repeat(east, conductivity_profile.shape[1])
df_AEM['northing'] = np.repeat(north, conductivity_profile.shape[1])

df_AEM['doi'] = np.repeat(doi, conductivity_profile.shape[1])

df_AEM['borehole_id'] = np.repeat(df_gw['borehole_id'].values,
                                   conductivity_profile.shape[1])

In [10]:
# Mask below the doi

mask = df_AEM['doi'] > df_AEM['Depth_from']

df_AEM = df_AEM[mask]

In [11]:
df_gw

Unnamed: 0,borehole_id,Easting,Northing,elevation,Depth_from,Depth_to,EC_(S/m),TDS (mg/l),conductivity
0,RN021012,730704.956,8617357.034,30.11441,35.0,40.0,0.0402,341.93849,
1,RN021398,731319.997,8619159.994,29.9,55.8,62.0,0.0292,276.170464,
2,RN021760,728455.948,8619158.002,18.66,40.5,46.5,0.0374,300.6545,
3,RN025941,732518.999,8619168.994,32.38,76.0,82.0,0.0328,305.899496,
4,RN035865,722976.996,8623636.994,9.00648,49.0,65.0,0.0422,361.983264,
5,RN036538,739083.006,8621835.994,2.33975,79.0,85.0,6.07,51533.01123,
6,RN037154,737529.964,8618857.946,18.33939,42.0,59.0,0.0372,301.49134,
7,RN037414,737791.994,8620381.994,13.9,73.0,76.0,0.0605,481.042408,
8,RN037492,725371.004,8623695.994,5.5,53.0,58.0,2.96,21621.88707,
9,RN037493,726137.001,8622208.995,8.23,60.0,65.0,0.1469,916.547432,


In [12]:


for index, row in df_gw.iterrows():
    
    # Subset the interpreted dataframe
    borehole_id = row.borehole_id
   
    # Get the aem model for this bore
    df_temp = df_AEM[df_AEM['borehole_id'] == borehole_id]
    
    # Subset based on the screenen interval and average
    
    mask = np.logical_or((df_temp['Depth_from'] > row.Depth_to),(df_temp['Depth_to'] < row.Depth_from))
    
    print(mask.sum())
    
    print(df_temp[~mask])
    

    interval_cond = df_temp[~mask]['log_conductivity'].mean()
    
    # Add this to the df_merged dataframe
    df_gw.at[index, 'conductivity'] = 10**interval_cond
    
# Remove any nulls where the screen didn't intersect the layered model
df_gw.dropna(how="any", subset = ['conductivity'], inplace = True)

25
   Depth_from   Depth_to  log_conductivity     easting   northing borehole_id  \
5   29.200001  36.500000         -2.595496  730712.375  8617344.0    RN021012   
6   36.500000  44.299999         -3.192012  730712.375  8617344.0    RN021012   

          doi  
5  387.609985  
6  387.609985  
26
    Depth_from   Depth_to  log_conductivity     easting   northing  \
38   52.700001  61.799999         -2.957224  731273.625  8619185.0   
39   61.799999  71.599998         -3.102433  731273.625  8619185.0   

   borehole_id     doi  
38    RN021398  455.25  
39    RN021398  455.25  
20
    Depth_from   Depth_to  log_conductivity      easting   northing  \
66   36.500000  44.299999         -2.537568  728438.3125  8619162.0   
67   44.299999  52.700001         -2.907518  728438.3125  8619162.0   

   borehole_id         doi  
66    RN021760  255.130005  
67    RN021760  255.130005  
27
     Depth_from   Depth_to  log_conductivity     easting   northing  \
100   71.599998  82.199997         -2.

In [13]:
EC = df_gw['EC_(S/m)'].values * 10000
conductivity = df_gw['conductivity'].values


array([  402.,   292.,   374.,   328.,   422., 60700.,   372.,   605.,
       29600.,  1469., 30200.,   326.,   206.])

In [44]:
# Linear regression function

from scipy import stats

slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(conductivity),
                                                               np.log10(EC))

In [45]:
EC

array([  402.,   292.,   374.,   328.,   422., 60700.,   372.,   605.,
       29600.,  1469., 30200.,   326.,   206.])

In [46]:
conductivity

array([0.00127716, 0.00093362, 0.00189434, 0.00100878, 0.00327128,
       0.47585093, 0.00945169, 0.00372162, 0.01920123, 0.00122673,
       0.09208104, 0.00634518, 0.00372995])

In [83]:
import matplotlib.ticker as mtick

fig, ax = plt.subplots(1,1, figsize = (6,4))


ax.scatter(conductivity, EC)

        
ax.set_xlabel('Bulk conductivity sampled from AEM  (S/m)')

ax.set_ylabel('EC (µs/cm)')
ax.grid()

ax.set_xscale('log')
ax.set_yscale('log')

ax.set_xlim(0.0001,1)

x = np.log10(ax.get_xlim())
y = slope*x + intercept

ax.plot(10**x, 10**y, 'grey', alpha = 0.5, label = 'linear regression function \n R-squared = '+ str(round(r_value**2,2)))
ax.legend()

plt.savefig(r'C:\Users\PCUser\Desktop\NSC_data\reporting\HowardEast\HE_EC_AEM_conductivity_scatter.png', dpi = 300)
plt.show()

  This is separate from the ipykernel package so we can avoid doing imports until


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [73]:
x

array([-4.,  0.])

In [74]:
y

array([1.45004137, 5.13566166])

In [78]:
round(r_value**2,2)

0.71