A notebook for importing induction gamma data from csv files, resampling them to a regular interval, writing the interpolated data into a las file and adding the metadata to the las file header.

Neil Symington
neil.symington@ga.gov.au

In [1]:
import os, glob
import pandas as pd
import sys
import numpy as np
# Key package for processing las files 
import lasio
import matplotlib.pyplot as plt
import sys
from datetime import datetime
import math
from scipy import interpolate

In [50]:
# INterpolation functions
def get_regular_sampling_intervals(min_depth, max_depth, interval_spacing):
    
    newmin= math.ceil(min_depth * (1 /interval_spacing))/(1 /interval_spacing)
    newmax= math.floor(max_depth * (1 /interval_spacing))/(1 /interval_spacing)

    return np.arange(newmin, newmax + interval_spacing, interval_spacing)

def interpolate_logs(depths, data, new_depths, how = 'cubic'):

    interp = interpolate.interp1d(depths, data, kind = 'nearest',
                              bounds_error = False, fill_value = np.nan)
    return interp(new_depths)

In [19]:
# First lets concatenate all the csv files for induction and gamma into a csv

#outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\NSC_combined\NSC_induction_filtered.csv"
outfile = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\NSC_combined\NSC_gamma_filtered.csv"

#indir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\csv_files\induction"
indir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\csv_files\gamma"

#df_filt = pd.DataFrame(columns = ['Bore_ID', 'DEPT','INDUCTION_CALIBRATED'])
df_filt = pd.DataFrame(columns = ['Bore_ID', 'DEPT','GAMMA_CALIBRATED'])

for file in glob.glob(os.path.join(indir, "*.csv")):
    df_temp = pd.read_csv(file)
    df_temp['Bore_ID'] = file.split('\\')[-1].split('_')[0]
    df_filt = df_filt.append(df_temp)

df_filt.dropna(how = 'any', inplace = True)

df_filt.to_csv(outfile, index = False)

df_filt = None

In [20]:
# Extract borehole information into a pandas dataframe
df_header = pd.read_csv(r"C:\Users\PCUser\Desktop\NSC_data\data\bores\NSC_bore_compilation.csv")

# Drop rows with no induction and gamma data

mask = df_header['induction_data'] == 1

df_header = df_header[mask]

In [21]:
# GEt the induciton and gamma data into dataframes
indfile = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\NSC_combined\NSC_induction_filtered.csv"

gamfile = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\NSC_combined\NSC_gamma_filtered.csv"

df_induction_data = pd.read_csv(indfile)

df_gamma_data = pd.read_csv(gamfile)

In [49]:
df_header_subset = df_header[df_header['Borename'] == 'RN038196']

In [55]:
# Now we want to build las files from scratch

outdir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\las_files"

# Dictionaries with units for each column
gamma_units = {'GAMMA_CALIBRATED':'cps' }

induction_units = {'INDUCTION_CALIBRATED': 'mS/m'}
# Iterate through each site


for index, row in df_header.iterrows():
    
    project_area = row.project
    
    # Create a las object
    las = lasio.LASFile()
    
    # Get enos and write it to the las file
    #eno = row.ENO
    borename = row.Borename
    
    #las.well.UWI = eno
    las.well.WELL = borename
    
    # Add other information
    las.well.DATE = str(datetime.today())
    las.well.COMP = "Geoscience Australia"
    las.well.CTRY = "AUS"
    
    las.well['X'] = lasio.HeaderItem(mnemonic='X', value=np.round(row.X,1),
                                     descr='eastings (m)')
    las.well['Y'] = lasio.HeaderItem(mnemonic='Y', value=np.round(row.Y,1),
                                     descr='northings (m)')
    las.well['GDAT'] = lasio.HeaderItem(mnemonic='GDAT', value='GDA94',
                                     descr='geodetic datum')
    las.well['HZCS'] = lasio.HeaderItem(mnemonic='HZCS', value='MGA zone 52',
                                     descr='Horizontal Co-ordinate System')
    
    # Add infromatoin on the datum
    las.well['PDAT'] = lasio.HeaderItem('PDAT', value= "GL", descr = "Local Permanent Datum")
    
    las.well ['LMF'] = lasio.HeaderItem('LMF', value= row['DEPTH_REFERENCE_TYPE'],
                                          descr = "Logging Measured From")
    
    las.well['APD'] = lasio.HeaderItem('APD', value= row['Ref_Datum'], unit = 'm',
                                          descr = "Elevation of Depth Reference (LMF) Above Permanent Datum")
    
    las.well['EPD'] = lasio.HeaderItem('EPD', value= row['ground_mAHD'], unit = 'mAHD',
                                          descr = "Elevation of Permanent Datum (PDAT) above Mean Sea Level")
    
    # get induction
    
    mask = df_induction_data.Bore_ID == borename
    
    # Get rows and non null columns
    df_ind_dat  = df_induction_data[mask]
    
    mask = df_gamma_data.Bore_ID == borename
    
    df_gam_dat  = df_gamma_data[mask]
    
    # Find the minimum and maximum depths
    all_depths = np.concatenate([df_ind_dat.DEPT.values, df_gam_dat.DEPT.values])
    
    min_depth, max_depth = np.min(all_depths), np.max(all_depths)
    
    # Now we want to resample depths to every 5 cm intervals and add to the
    # las object
    
    new_depths = get_regular_sampling_intervals(min_depth, max_depth, 0.05)
    
    new_depths = np.round(new_depths,2)
    
    
    las.add_curve('DEPT', new_depths, unit='m')
    
    # Now we interpolate the data columns
    
    # gamma columns
    if len(df_gam_dat) > 0:
        # Flag for gamma
        gamma_acquired = True
        
        # Iterate
        for item in gamma_units.keys():
    
            data = df_gam_dat[item].dropna()
        
           # Get the non interpolated depths
            depths = df_gam_dat.loc[data.index,'DEPT'].values
            
            # If the columns is not null interpolate
            if not pd.isnull(data).all():
            
                new_data = interpolate_logs(depths, data.values,
                                            new_depths, how = 'linear')
                # Add the curve to the las file
                las.add_curve(item, new_data, unit=gamma_units[item])
    else:
        # Flag for gamma
        gamma_acquired = False
        
                
    if len(df_ind_dat) > 0:
        # Flag for induction
        induction_acquired = True
        
        # Iterate
        for item in induction_units.keys():
    
            data = df_ind_dat[item].dropna()
        
            # Get the non interpolated depths
            depths = df_ind_dat.loc[data.index,'DEPT'].values
            
            # If the columns is not null interpolate
            if not pd.isnull(data).all():
            
                new_data = interpolate_logs(depths, data.values,
                                            new_depths, how = 'linear')
                # Add the curve to the las file
                las.add_curve(item, new_data, unit=induction_units[item])
    else:
        # Flag for gamma
        induction_acquired = False
        
    # Add some metadata
    metadata = "Filtered "

    outfile = borename
    
    if induction_acquired:
        outfile += '_induction'
        metadata += ' induction'
    if gamma_acquired:
        outfile += '_gamma'
        metadata += ' gamma'
        
    metadata += " logs for the "
    metadata += project_area
    metadata += " EFtF project."
    outfile += '.LAS'
    
    las.other = metadata
    
    las.write(os.path.join(outdir, outfile), version=2.0)

    

ValueError: zero-size array to reduction operation minimum which has no identity

In [None]:
row

In [56]:
# Now plot some of these logs to check the reasonableness of the resampling

def plot_induction_gamma_logs(lasfile, induction_column, gamma_column):
    las = lasio.read(lasfile)
    df_logs = las.df()
    y = df_logs.index.values
    x1 = df_logs[induction_column].values
    x2 = df_logs[gamma_column].values
    
    fig, (ax1, ax2) = plt.subplots(1,2,sharey = True,
                                  figsize = (8,8))
    ax1.plot(x1, y)
    ax1.set_xscale('log')
    ax2.plot(x2, y)
    ax1.invert_yaxis()
    ax2.invert_yaxis()
    lasplotdir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\las_plots"
    fname = file.split('\\')[-1].split('_')[0] + "_las_plots.png"
    plt.savefig(os.path.join(lasplotdir, fname),
               dpi = 300)
    plt.close()

def plot_ind_vs_filtered(lasfile, induction_column):
    las = lasio.read(lasfile)
    bore_name = file.split('\\')[-1].split('_')[0]
    df_logs = las.df()
    y1 = df_logs.index.values
    x1 = df_logs[induction_column].values
    # Get filtered data
    mask= df_induction_data['Bore_ID'] == bore_name
    df_temp = df_induction_data[mask]
    
    y2 = df_temp['DEPT'].values
    x2 = df_temp[induction_column].values
    
    fig, (ax1, ax2) = plt.subplots(1,2,sharey = True,
                                   sharex = True,
                                  figsize = (8,8))
    ax1.plot(x1, y1)
    ax1.set_xscale('log')
    ax2.plot(x2, y2)
    ax1.invert_yaxis()
    ax2.invert_yaxis()
    ax1.grid(True)
    ax2.grid(True)
    lasplotdir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\las_plots"
    fname = file.split('\\')[-1].split('_')[0] + "_induction_plots.png"
    plt.savefig(os.path.join(lasplotdir, fname),
               dpi = 300)
    plt.close()
    
def plot_gam_vs_filtered(lasfile, gamma_column):
    las = lasio.read(lasfile)
    bore_name = file.split('\\')[-1].split('_')[0]
    df_logs = las.df()
    y1 = df_logs.index.values
    x1 = df_logs[gamma_column].values
    # Get filtered data
    mask= df_gamma_data['Bore_ID'] == bore_name
    df_temp = df_gamma_data[mask]
    
    y2 = df_temp['DEPT'].values
    x2 = df_temp[gamma_column].values
    
    fig, (ax1, ax2) = plt.subplots(1,2,sharey = True,
                                   sharex = True,
                                  figsize = (8,8))
    ax1.plot(x1, y1)
    ax2.plot(x2, y2)
    ax1.invert_yaxis()
    ax2.invert_yaxis()
    ax1.grid(True)
    ax2.grid(True)
    lasplotdir = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\las_plots"
    fname = file.split('\\')[-1].split('_')[0] + "_gamma_plots.png"
    plt.savefig(os.path.join(lasplotdir, fname),
               dpi = 300)
    plt.close()


In [None]:
infile = r"C:\Users\PCUser\Desktop\NSC_data\data\inductionGamma\las_files\RN036538_induction_gamma.LAS"
plot_induction_gamma_logs(infile, 'INDUCTION_CALIBRATED','GAMMA_CALIBRATED')

In [57]:
os.chdir(outdir)

for file in glob.glob('*.las'):
    #plot_induction_gamma_logs(file, 'INDUCTION_CALIBRATED','GAMMA_CALIBRATED')
    plot_ind_vs_filtered(file, 'INDUCTION_CALIBRATED')
    plot_gam_vs_filtered(file, 'GAMMA_CALIBRATED')

In [43]:
df_induction_data['Bore_ID']

Unnamed: 0,Bore_ID,DEPT,INDUCTION_CALIBRATED
0,RN007509,54.675,23.280093
1,RN007509,54.685,22.712323
2,RN007509,54.695,22.170455
3,RN007509,54.705,21.698332
4,RN007509,54.715,21.273953
5,RN007509,54.725,20.907202
6,RN007509,54.735,20.578915
7,RN007509,54.745,20.280951
8,RN007509,54.755,20.012153
9,RN007509,54.765,19.776620
