
This notebook demonstrates how to create an analysis ready spatialite database for borehoel data. All data has been processed filtered and the depths corrected onto to metres below ground level. Induction and gamma data are resampled to 5cm intervals and are on the same table.

Neil Symington neil.symington@ga.gov.au

In [1]:
import shapely.wkb
import shapely.wkt
from shapely.geometry import Point
import os, glob
import pandas as pd
# sqlite/spatialite
from sqlalchemy import create_engine, event, ForeignKey
from sqlalchemy import Column, Integer, String, Float, Date, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlite3 import dbapi2 as sqlite
import sys
from pyproj import Proj, transform
import lasio
import sqlite3
import numpy as np
import numpy as np
import lasio
import matplotlib.pyplot as plt
import sys
import datetime
import math

In [2]:
# Neil Symington's local configuration
DB_ROOT = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\spatialite"
SPATIALITE_PATH = r'C:\mod_spatialite-4.3.0a-win-amd64'

# Add spatialite dll to path
os.environ['PATH'] = SPATIALITE_PATH + ';' + os.environ['PATH']

In [5]:
DB_PATH = os.path.join(DB_ROOT, r"East_Kimberley_borehole_data.sqlite")

if os.path.exists(DB_PATH):
        os.remove(DB_PATH)

        
engine = create_engine('sqlite:///' + DB_PATH, module=sqlite, echo=False)

@event.listens_for(engine, 'connect')
def connect(dbapi_connection, connection_rec):
    dbapi_connection.enable_load_extension(True)
    dbapi_connection.execute('SELECT load_extension("mod_spatialite")')

# create spatialite metadata
print('creating spatial metadata...')
engine.execute("SELECT InitSpatialMetaData(1);")

creating spatial metadata...


<sqlalchemy.engine.result.ResultProxy at 0x4c8fe48>

In [6]:
Base = declarative_base()

class Boreholes(Base):
    __tablename__ = 'borehole'
    borehole_id = Column(Integer, index=True, primary_key=True)
    borehole_name = Column("Borehole_name", String(20))
    easting = Column("Easting", Float)
    northing = Column("Northing", Float)
    elevation = Column("Ground_elevation_mAHD", Float)
    induction = Column("Induction_acquired", Boolean)
    gamma = Column("Gamma_acquired", Boolean)
    javelin = Column("Javelin_acquired", Boolean)
    hylogger_core = Column("Hylogger_acquired_on_core", Boolean)
    hylogger_chips = Column("Hylogger_acquired_on_chips", Boolean)
    lithology = Column("Lithology_available", Boolean)
    ECpH = Column("EC_pH_acquired", Boolean)
    swl = Column("SWL_available", Boolean)
    geometry = Column(String)

    
class Induction_gamma_data(Base):
    __tablename__ = 'induction_gamma_data'
    table_id = Column(Integer, index=True, primary_key=True)
    depth = Column("Depth", Float)
    conductivity = Column("Conductivity", Float)
    gamma_calibrated = Column("Gamma_calibrated", Float)
    K = Column("K", Float)
    U = Column("U", Float)
    Th = Column("Th", Float)
    GR = Column("GR", Float)
    
    borehole_id = Column(Integer, ForeignKey('borehole.borehole_id'))
    borehole_header = relationship("Boreholes")

class Borehole_NMR_data(Base):
    __tablename__ = 'boreholeNMR_data'
    table_id = Column(Integer, index=True, primary_key=True)
    depth = Column("Depth", Float)
    totalf = Column("Total_water_content", Float)
    clayf = Column("Clay_water_content", Float)
    capf = Column("Capillary_water_content", Float)
    free = Column("Free_water_content", Float)
    T2 = Column("T2", Float)
    K = Column("K_sdr", Float)
    
    borehole_id = Column(Integer, ForeignKey('borehole.borehole_id'))
    borehole_header = relationship("Boreholes")
    
class Lithology(Base):
    __tablename__ = 'borehole_lithology'
    table_id = Column(Integer, index=True, primary_key=True)
    depth_from = Column("Depth_from", Float)
    depth_to = Column("Depth_to", Float)
    lithology = Column("Lithology_name", String(40))
    lithdescription = Column("Lithology_description", String(250))
    colour = Column("Munsell_colour", String(10))
    
    borehole_id = Column(Integer, ForeignKey('borehole.borehole_id'))
    borehole_header = relationship("Boreholes")

class EC_pH(Base):
    __tablename__ = 'pore_fluid_EC_pH'
    table_id = Column(Integer, index=True, primary_key=True)
    depth = Column("Depth", Float)
    EC = Column("EC", Float)
    pH = Column("pH", Float)
    
    borehole_id = Column(Integer, ForeignKey('borehole.borehole_id'))
    borehole_header = relationship("Boreholes")
    
    
class SWL(Base):
    __tablename__ = 'standing_water_level'
    table_id = Column(Integer, index=True, primary_key=True)
    date = Column("Date", Date)
    depth = Column("Depth", Float)
    Measurer = Column("Measurer", String(30))
    
    borehole_id = Column(Integer, ForeignKey('borehole.borehole_id'))
    borehole_header = relationship("Boreholes")
    
    

In [7]:
Base.metadata.create_all(engine)

In [8]:
infile = os.path.join(DB_ROOT, "Boreholes_header.csv")

df_header = pd.read_csv(infile)

df_header["Induction_acquired"] = 0
df_header["Gamma_acquired"] = 0
df_header["Javelin_acquired"] = 0
df_header["Hylogger_chips_acquired"] = 0
df_header["Hylogger_core_acquired"] = 0
df_header["lithology_description"] = 0
df_header["EC_pH_acquired"] = 0
df_header['SWL_available'] = 0

df_header['easting'] = [shapely.wkt.loads(x).x for x  in df_header["geometry"]]
df_header['northing'] = [shapely.wkt.loads(y).y for y  in df_header["geometry"]]

In [9]:
def update_availability_flag(df_header, channels, eno):
    # find index for given eno
    index = df_header[df_header["ENO"] == eno].index
    
    # Check induciton
    if ("INDUCTION_CALIBRATED" in channels) or ("INDUCTION_BOREHOLE_COMPENSATED" in channels):
        df_header.at[index, "Induction_acquired"] = 1
    
    # Check gamma
    if ("GAMMA_CALIBRATED" in channels) or ("GR" in channels) or ("K" in channels) or \
        ("U" in channels) or ("Th" in channels):
        df_header.at[index, "Gamma_acquired"] = 1
    
    return df_header
    

In [10]:
# Now lets read in the induciton gamma data

las_dir = r"\\prod.lan\active\proj\futurex\East_Kimberley\Data\Processed\Geophysics\Induction_gamma\EK_filtered_induction_gamma"


# Create empty dataframe into which to append the data

df_indgam = pd.DataFrame(columns = ["borehole_id", "Depth_mBGL"])
# Iterate through the las files
os.chdir(las_dir)

for file in glob.glob('*.LAS'):
    las = lasio.read(file)
    df_logs = las.df()
    
    # Get the eno and ref datum
    datum = las.well.APD.value
    eno = las.well.UWI.value
    
    # Update the df_header dataframe with the inclusion or otherwise of
    # induction and gamma
    df_header = update_availability_flag(df_header, df_logs.columns, eno)
    
    df_logs["borehole_id"] = eno
    
    # Now make the convert the depth reference to mBGL
    df_logs["Depth_mBGL"] = df_logs.index - datum
    
    # Append
    df_indgam = df_indgam.append(df_logs) 

df_indgam.reset_index(inplace=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [11]:
df_indgam.columns

Index(['index', 'Depth_mBGL', 'GAMMA_CALIBRATED', 'GR',
       'INDUCTION_BOREHOLE_COMPENSATED', 'INDUCTION_CALIBRATED', 'K', 'TH',
       'U', 'borehole_id'],
      dtype='object')

In [12]:
#Convert ot S/m
df_indgam['INDUCTION_BOREHOLE_COMPENSATED'] = df_indgam['INDUCTION_BOREHOLE_COMPENSATED'].values /1000.
df_indgam['INDUCTION_CALIBRATED'] = df_indgam['INDUCTION_CALIBRATED'].values /1000.

In [13]:
# Now we import the javelin data
infile = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\borehole_NMR\bNMR_data_compiled.csv"

df_bnmr_data = pd.read_csv(infile)

In [14]:
# Now update the flag for NMR data

bnmr_enos = df_bnmr_data.borehole_id.unique()

for index, row in df_header.iterrows():
    if row['ENO'] in bnmr_enos:
        df_header.at[index, "Javelin_acquired"] = 1

In [15]:
# Now bring in the lithology data

infile = r"R:\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\sonic_lithology\EK_lithlog_sonic_duplicates_removed.csv"

df_lithology = pd.read_csv(infile)

lithology_enos = df_lithology.ENO.unique()

# header table gets true if lithology data is available
# for this site

for index, row in df_header.iterrows():
    if row['ENO'] in lithology_enos:
        df_header.at[index, "lithology_description"] = 1
    

In [16]:
# NOw we bring in the hylogger data
hylog_dir = r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\hylogger"

df_hylogs = pd.read_csv(os.path.join(hylog_dir, "EK_hylogg_results_core.csv"))

df_hychips = pd.read_csv(os.path.join(hylog_dir, "EK_hylogg_results_chips.csv"))

  interactivity=interactivity, compiler=compiler, result=result)


In [17]:
df_hylogs

Unnamed: 0,AMPHIBOLE_(U_TIR_TSA707_Group),Actinolite_(U_TIR_TSA707_Mineral),Albite_(U_TIR_TSA707_Mineral),Alunite-K_(U_TIR_TSA707_Mineral),Alunite-Na_(U_TIR_TSA707_Mineral),AmphiboleML48_(U_TIR_TSA707_Mineral),Andesine_(U_TIR_TSA707_Mineral),Apatite_(U_TIR_TSA707_Mineral),Aspectral_(U_TIR_TSA707_Mineral),Bytownite_(U_TIR_TSA707_Mineral),...,Prehnite_(U_SWIR_TSA705_Mineral),Pyrophyllite_(U_SWIR_TSA705_Mineral),SMECTITE_(U_SWIR_TSA705_Group),SULPHATE_(U_SWIR_TSA705_Group),Sample_Comment_y,Saponite_(U_SWIR_TSA705_Mineral),Siderite_(U_SWIR_TSA705_Mineral),Topaz_(U_SWIR_TSA705_Mineral),Tremolite_(U_SWIR_TSA705_Mineral),WHITE-MICA_(U_SWIR_TSA705_Group)
0,0.0,0.0,0.373394,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.196341,T=0001 L=1 P=6 D=125.000006 X=46.263977 H=18BP01D,0.0,0.0,0.0,0.0,0.370663
1,0.0,0.0,0.366011,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.135827,T=0001 L=1 P=7 D=125.009529 X=54.263977 H=18BP01D,0.0,0.0,0.0,0.0,0.555458
2,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.154842,T=0001 L=1 P=8 D=125.019051 X=62.263977 H=18BP01D,0.0,0.0,0.0,0.0,0.503736
3,0.0,0.0,0.397523,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.143289,T=0001 L=1 P=9 D=125.028574 X=70.263977 H=18BP01D,0.0,0.0,0.0,0.0,0.532410
4,0.0,0.0,0.389085,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.169446,T=0001 L=1 P=10 D=125.038097 X=78.263977 H=18B...,0.0,0.0,0.0,0.0,0.535842
5,0.0,0.0,0.340442,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.159602,T=0001 L=1 P=11 D=125.047620 X=86.263977 H=18B...,0.0,0.0,0.0,0.0,0.501905
6,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.096739,T=0001 L=1 P=12 D=125.057143 X=94.263977 H=18B...,0.0,0.0,0.0,0.0,0.478097
7,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.244240,T=0001 L=1 P=13 D=125.066666 X=102.263977 H=18...,0.0,0.0,0.0,0.0,0.490676
8,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.288607,T=0001 L=1 P=14 D=125.076189 X=110.263977 H=18...,0.0,0.0,0.0,0.0,0.342398
9,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,0.0,0.0,0.300658,T=0001 L=1 P=15 D=125.085712 X=118.263977 H=18...,0.0,0.0,0.0,0.0,0.699342


In [18]:
# Now update the flag for NMR data

hylog_core_enos = df_hylogs.Borehole_ENO.unique()
hylog_chips_enos = df_hychips.Borehole_ENO.unique()

for index, row in df_header.iterrows():
    if row['ENO'] in hylog_core_enos:
        df_header.at[index, "Hylogger_core_acquired"] = 1
        
    if row['ENO'] in hylog_chips_enos:
        df_header.at[index, "Hylogger_chips_acquired"] = 1
    


In [19]:
# Bring in the EC pH data
df_ECpH = pd.read_csv(r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\EC_pH\EC_pH_sonic.csv")

ECpH_enos = df_ECpH['Borehole_eno'].values

# Update the flags
for index, row in df_header.iterrows():
    if row['ENO'] in ECpH_enos:
        df_header.at[index, "EC_pH_acquired"] = 1

In [20]:
# COnvert to S/m

df_ECpH['EC Value'] = df_ECpH['EC Value'].values * 0.1

In [21]:
# Bring in the SWL data
df_swl = pd.read_csv(r"\\prod.lan\active\proj\futurex\East_Kimberley\Working\SharedWorkspace\Bores_working\compilation\SWLs\EK_adjusted_SWL.csv")

swl_enos = df_swl['ENO'].values

# Create datetime object
df_swl['Date Measured'] = pd.to_datetime(df_swl['Date Measured'], dayfirst = True,
               format = "%d/%m/%Y")

# Update the flags
for index, row in df_header.iterrows():
    if row['ENO'] in swl_enos:
        df_header.at[index, "SWL_available"] = 1

In [22]:
# Now that the data has been loaded we write it to the spatialite database

# Add header data to a list


all_bores = []

for index, row in df_header.iterrows():
    bore = Boreholes(borehole_id = row['ENO'],
                 borehole_name = row['BOREHOLE_NAME'],
                 easting = row['easting'],
                 northing = row['northing'],
                 elevation = row['ground_elevation_(mAHD)'],
                 induction = row['Induction_acquired'],
                 gamma = row['Gamma_acquired'],
                 javelin = row['Javelin_acquired'],
                 hylogger_chips = row['Hylogger_chips_acquired'],
                 hylogger_core = row['Hylogger_core_acquired'],
                 lithology = row['lithology_description'],
                 ECpH = row["EC_pH_acquired"],
                 swl = row["SWL_available"],
                 geometry = row['geometry'])
    
    all_bores.append(bore)

In [23]:
# Add nmr data to a list

all_nmr_data = []

for index, row in df_bnmr_data.iterrows():
    
    nmr_data = Borehole_NMR_data(table_id = index,
                            depth = row['Depth_mBGL'],
                            totalf = row["Total_water_content"],
                            clayf = row["Clay_water_content"],
                            capf = row["Capillary_water_content"],
                            free = row["Free_water_content"],
                            T2 = row["T2"],
                            K  = row['Ksdr'],
                            borehole_id = row['borehole_id'])
    
    all_nmr_data.append(nmr_data)

In [24]:
# Add induction gamma data to a list

all_indgam_data = []

for index, row in df_indgam.iterrows():
    
    # COnductivity will be what ever values is available
    
    if not pd.isnull(row['INDUCTION_BOREHOLE_COMPENSATED']):
        
        conductivity = row['INDUCTION_BOREHOLE_COMPENSATED']
    
    elif not pd.isnull(row['INDUCTION_CALIBRATED']):
        
        conductivity = row['INDUCTION_CALIBRATED']
        
    else:
        
        conductivity = np.nan
    
    indgam_data = Induction_gamma_data(table_id = index,
                            depth = row['Depth_mBGL'],
                            conductivity = conductivity,
                            gamma_calibrated = row['GAMMA_CALIBRATED'],
                            K = row["K"],
                            U = row["U"],
                            Th = row["TH"],
                            GR  = row['GR'],
                            borehole_id = row['borehole_id'])
    
    all_indgam_data.append(indgam_data)

In [25]:
all_lithology_data = []

for index, row in df_lithology.iterrows():
    
    lithology_data = Lithology(table_id = index,
                            depth_from = row['Depth_from'],
                            depth_to = row["Depth_to"],
                            lithology = row["Lithology_name"],
                            lithdescription = row['DESCRIPTION'],
                            colour = row['colour'],
                            borehole_id = row['ENO'])
    
    all_lithology_data.append(lithology_data)

In [26]:
all_EC_pH_data = []

for index, row in df_ECpH.iterrows():
    
    ECpH_data = EC_pH(table_id = index,
                            depth = row['Depth'],
                            EC = row["EC Value"],
                            pH = row['pH'],
                            borehole_id = row['Borehole_eno'])
    
    
    all_EC_pH_data.append(ECpH_data)

In [27]:
all_swl_data = []

for index, row in df_swl.iterrows():
    
    swl_data = SWL(table_id = index,
                    depth = row['SWL_m'],
                    date = row["Date Measured"],
                    Measurer = row['Who_Measured'],
                    borehole_id = row['ENO'])
    
    
    all_swl_data.append(swl_data)

In [28]:
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()

In [29]:
session.add_all(all_bores)
session.add_all(all_nmr_data)
session.add_all(all_indgam_data)
session.add_all(all_lithology_data)
session.add_all(all_EC_pH_data)
session.add_all(all_swl_data)

session.commit()

In [30]:
# Create the spatialite table

# add a Spatialite geometry column called 'geom' to the table, using ESPG 28352,
# data type POLYGON and 2 dimensions (x, y)
engine.execute("SELECT AddGeometryColumn('borehole', 'geom', 28352, 'POINT', 'XY', 1);")

# update the yet empty geom column by parsing the well-known-binary objects from the geometry column into 
# Spatialite geometry objects
engine.execute("UPDATE borehole SET geom=GeomFromText(geometry, 28352);")

<sqlalchemy.engine.result.ResultProxy at 0xaf16b70>

In [29]:
# Now we will add the hylogging data to the database. Note that this could be done 
# using the declarative base using a similar approach to that used above but 
# the number of columns and my unfamiliarity with the data makes this a too tedious a task

df_hylogs.to_sql("Hylogging_data_from_core", engine, if_exists='replace', index = False)
df_hychips.to_sql("Hylogging_data_from_chips", engine, if_exists='replace', index = False)

In [31]:
# Create a metadata table and add it

df_metadata = pd.DataFrame(data = {"Depths": ['metres below ground level'],
                                  "Conductivity": ["S/m"],
                                   "GAMMA_CALIBRATED": ["counts per second"],
                                   "GR": ["American Petroleum Index"],
                                    "U": ["ppm"],
                                    "Th": ["ppm"],
                                    "K": ["%"],
                                     "water content": ["fraction"],
                                     "Ksd": ["metres per day"],
                                    "EC": ["S/m"]})


In [32]:
df_metadata.to_sql("Units", engine, if_exists="replace", index=False)