This notebook demonstrates how to apply some filtering and processing on induction and gamma logs. The aim is to make the data more interpretable by removing features that are not related to the hydrogeology of the bore. For example, steel bolts in the bore casing give a large induction response but have no relation to the formation.

In [1]:
%matplotlib inline

In [2]:
import numpy as np
import lasio # package for reading and writing las files
import pandas as pd
import matplotlib.pyplot as plt
import sys, os
import datetime
import math

In [12]:
# Define key functions

def plot_logs(ax, ser, title, logplot = True):
    """
    @param ax: matplotlib axis object
    @param ser: pandas series object containing values to be plotted
    @param title: title of plot
    @param logplot: boolean: if true the values are plotted in log10 space
    """
    
    ax.plot(ser.values, ser.index)
    if logplot:
        ax.set_xscale('log')
       
    ax.grid(which='major', linestyle='-', linewidth='0.5', color='grey')
    ax.grid(which='minor', linestyle=':', linewidth='0.5', color='grey')
    ax.set_title(title)


def combination(n, r):
    """
    Function for finding combinations of a sequene
    @param: n. 
    """
    return int((math.factorial(n)) / ((math.factorial(r)) * math.factorial(n - r)))   

def pascals_triangle(rows):
    result = [] 
    for count in range(rows):
        row = []
        for element in range(count + 1): 
            row.append(combination(count, element))
        result.append(row)
        # count += 1 # avoidable
    return result[rows-1]

def binom_filter(x, kernel):
    """
    Function that applies the binomial filter
    @param x: 1D array on which to apply the filter
    @param kernel: 1D array with binomial kernel
    retrurn
    """
    return np.mean(np.convolve(x, kernel, 'same'))
    
def run_filter(series, window, min_periods, filter):
    """
    Function that applies the functoin to the pandas series. Mostly based on 
    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html
    @param series: pandas series with the to be filtered data
    @param: window: integer with filter window size
    @param: min_periods:minimum observation to have a value returned
    @param: filter: 'median', 'binomial' or 'mean': define what type of filter to use
    
    returns:
    a Window or Rolling sub-classed for the particular operation
    """
    if filter == 'median':
        return series.rolling(window = window, min_periods = min_periods).median()
    
    elif filter == 'binomial':
        kernel = pascals_triangle(window)/np.sum(pascals_triangle(window))
        return series.rolling(window = window, min_periods = min_periods).apply(binom_filter, args = (kernel,), raw=True)
    
    elif filter == 'mean':
        return series.rolling(window = window, min_periods = min_periods).mean()
    
    else:
        print("Filter must be one of  'median', 'binomial' or 'mean'")

        
        
def filter_log(infile, bore_id, cols, windows, min_periods, csv_dir, fig_dir,
               filters  = ['median'], remove_negs = True, log_space = True,
               min_depth = np.nan, max_depth = np.nan):
    """
    Function that filters a geophysical log based on the kwarg filter types and filtering
    parameters.
    
    @param: infile: string: path to las file with geophysical log
    @param: bore_id: string: borehole identification
    @param: columns: comma delimited string with las file columns to filter
    @param: window: integer or list/ array of integers with filter window size
    @param: min_periods: integer or list/ array of minimum observation to have a value returned
    @param: csv_dir: string: path to directory into which the csv with values will be written
    @param: fig_dir: string: path to directory into which the filtered log figure will be written
    @param: filters: sequence with 'median', 'binomial' or 'mean': define what type of filter to use
    @param: remove_negs: boolean: if True then negative values will be removed prior to filtering
    @param: log_space: boolean: if True then observations will be transformed into log space for filtering
    @param: min_depth: float: if not nan then values shallower than this values are removed prior to filtering
    @param: max_depth: float: if not nan then values deeper than this values are removed prior to filtering
    
    This function writes the filtered data into a csv, plots the filtered log and writes the filtering metadata into
    a text file
    
    """
    
    # Check that windows and min period are list or array like
    if not isinstance(windows, (list, tuple, np.ndarray)):
        if isinstance(windows, int):
            windows = [windows]
        else:
            print('Invalid dtype for windows. Please make an integer or list.')
    
    if not isinstance(min_periods, (list, tuple, np.ndarray)):
        if isinstance(min_periods, int):
            windows = [min_periods]
        else:
            print('Invalid dtype for min_periods. Please make an integer or list.')

    if not isinstance(filters, (list, tuple, np.ndarray)):
        if filters in ['median', 'binomial', 'mean']:
            windows = [filters]
        else:
            print("Invalid entry for filter. Please make it string or list from ['median', 'binomial', 'mean']")
               
    # Open the las file
    las = lasio.read(infile)

    df_logs = las.df()
    
    # GEt prefix for file naming
    
    prefix = infile.split('\\')[-1].replace('LAS','las').split('.las')[0]
 
    # Iterate through columns
    
    columns = [x.strip() for x in cols.split(',')]
    
    for item in columns:
        
        # Remove any white spaces
        item = item.strip()
        
        # Create a data series from the column of interest
        series = df_logs[item]
        
        # Remove nans
        
        series = series.dropna()

        # Remove negative values
        if remove_negs:
         
            series = series[series>0]
        # Now filter using parameters described above
        
        filtered = series.copy()
        
        for i, filter in enumerate(filters):
        
            filtered = run_filter(filtered, windows[i],
                                  min_periods[i], filter)
     
        
        # Clip the filtered if values are assigned

        if np.isfinite(min_depth):
            filtered =  filtered[filtered.index > min_depth]
        
        if np.isfinite(max_depth):
            filtered =  filtered[filtered.index < max_depth]
        
        # Create a plot
    
        fig, (ax1,ax2) = plt.subplots(1,2, figsize = (8,8), sharex = True, sharey = True)
        
        # Plot the unfiltered data
        plot_logs(ax1, series, logplot = logplot, title = "unfiltered")
        ax1.invert_yaxis()
        plot_logs(ax2, filtered, logplot = logplot, title = "filtered")
        ax1.set_ylabel('depth (m)')
        ax1.set_xlabel('apparent conductivity (mS/m)')
        ax2.set_xlabel('apparent conductivity (mS/m)')
        # Create directories if need be
        
        for dir in [csv_dir, fig_dir]:
            new_dir = os.path.join(dir, bore_id)

            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
        
        plt.savefig(os.path.join(fig_dir, bore_id, prefix + '_' + item + '.png'))
        
        filtered.to_csv(os.path.join(csv_dir, bore_id, prefix + '_' + item + '.csv'))
        
        plt.close()
        
    # Write out metadata file
    filename = os.path.join(csv_dir, bore_id) + '\\filtering_metadata.txt'
        
    with open(filename, 'w') as f:
        s= 'Downhole logs filtered on the ' + str(datetime.datetime.now()) + '\n'
        s+= 'The filtered variables are {} .\n'.format(cols)
        
        filter_string = ', '.join(filters)
        window_string = ', '.join([str(x) for x in windows])
        min_periods_string = ', '.join([str(x) for x in min_periods])
        s+= 'Filtering using {} filter(s) with window size {} and minimum period of {}\n'.format(filter_string, window_string, min_periods_string)
        if np.isfinite(min_depth):
            s+= 'The log were clipped above {} m\n'.format(str(min_depth))
        if np.isfinite(max_depth):
            s+= 'The log were clipped below {} m\n'.format(str(max_depth))
        f.write(s)

In [13]:
# Now we will filter some logs. Bring a csv with the filtering parameters requires by the filter_log function into jupyter lab

infile = r"C:\Users\PCUser\Desktop\EK_data\Boreholes\gamma_induction\EK_induction.csv"

df = pd.read_csv(infile)

In [14]:
df['Infile'].iloc[0]

'C:\\Users\\PCUser\\Desktop\\EK_data\\EK_Induction_gamma\\2017_tfd_files_GA_ALT_system\\17BP01I\\17BP01I_induction_down_relogged.las'

In [16]:
# Batch mode using parameters from the spreadsheet
for index, row in df.iterrows():

    infile = row['Infile']
    bore_id = row['Bore_ID']
    filter = row['filter']
    filters = [row['filter_1'], row['filter_2']]
    Columns = row['Columns']
    windows = [row['window_filter_1'], row['window_filter_2']]
    min_periods = [row['min_periods_filter_1'], row['min_periods_filter_2']]
    csv_dir = row['csv_dir']
    plot_dir = row['plot_dir']
    remove_negs = row['remove_negs']
    logplot = row['logplot']
    min_depth = row['min_depth']
    max_depth = row['max_depth']
    
    if filter == 1:
        try:
            filter_log(infile, bore_id, Columns, windows, min_periods,
                       csv_dir, plot_dir, filters, remove_negs, logplot,
                       min_depth = min_depth, max_depth = max_depth)
        except FileNotFoundError:
            print('File not found. Check this path ', infile)





File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\RN029663\RN29663_induction_down.las
File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\RN029665\RN029665_induction_down.las
File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\RN029653\RN029653_induction_down.las
File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\RN029662\RN029662_induction_down.las
File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\13BP01D\13BP01D_induction_down.las
File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\17BP07D\17BP07D_induction_down.las


Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.
Header section Parameter regexp=~P was not found.


File not found. Check this path  C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2017_tfd_files_GA_ALT_system\RN007412\RN007412_induction_down.las


Header section Parameter regexp=~P was not found.


In [36]:
# To demonstrate the input parameters we print the last row of inputs

print(infile)
print(bore_id)
print(filters)
print(Columns)
print(windows)
print(min_periods)
print(csv_dir)
print(plot_dir)
print(remove_negs)
print(logplot)
print(min_depth)
print(max_depth)

C:\Users\PCUser\Desktop\EK_data\EK_Induction_gamma\2016_las_files_DGRT_system\Y10A_Ord40\Y10A_Ord40_conductivity_down.las
Ord40
['median', 'binomial']
INDUCTION_CALIBRATED
[30, 31]
[30, 31]
C:\Users\PCUser\Desktop\EK_data\Boreholes\gamma_induction\induction_output\csv_files
C:\Users\PCUser\Desktop\EK_data\Boreholes\gamma_induction\induction_output\pngs
True
True
1.97
14.16
