### Script to retrieve element names and unit suffixes from non-hyperspectral datasets in the NVCL Data Services

Acknowledgement: Adapted from Python code written by Shane Mule

In [1]:
from types import SimpleNamespace
import yaml
import json

from nvcl_kit.reader import NVCLReader
from nvcl_kit.param_builder import param_builder

import re

from periodictable import elements

In [2]:
def find_nonhyperspectral(nvcl_id_list, provider, reader):
    ''' This extracts all the names of the non-hyperspectral datasets from NVCL
        and cleans them up
        :param nvcl_id_list: list of NVCL ids
        :param provider: provider name e.g. 'vic', 'tas' etc.
        :param reader: NVCLReader object initialised for the 'provider' 
        :returns: tuple (geophysics, elements, suffixes)
    '''
    #print(f"Found NVCL IDs for {provider.upper()}: {nvcl_id_list[:20]}")
    geophys_cnt = 0
    geophys_set = set()
    raw_alg_set = set()
    old_elements_set = set()
    for nvcl_id in nvcl_id_list:
        logs_data_list = reader.get_logs_data(nvcl_id)
        if not logs_data_list:
            #print("No NVCL data!")
            continue
        for ldl in logs_data_list:
            if hasattr(ldl, 'log_name') and ldl.log_type not in ['1']:
                # Geophysics
                if re.search(r"(magsus|mag sus|cond)", ldl.log_name, re.IGNORECASE):
                    geophys_cnt += 1
                    geophys_set.add(ldl.log_name)
                # Elements
                if re.search(r"ppm|pct|%|per|arsen|Sillimanite|PbZn", ldl.log_name, re.IGNORECASE) \
                    or re.search(r"^(" + '|'.join([str(x) for x in list(elements)])+r")$", ldl.log_name, re.IGNORECASE) \
                    or re.search(r"^(" + '|'.join([str(x) for x in list(elements)]) + r")\s+.*$", ldl.log_name, re.IGNORECASE):
                    # print(repr(ldl.log_name))
                    raw_alg_set.add(ldl.log_name)
    alg_list = list(raw_alg_set)
    ele_list = [re.sub(r'ppm|pct|per', r'', alg, flags=re.IGNORECASE) for alg in alg_list]
    ele_list = [re.sub(r'([A-Za-z0-9]+)(_| )*(.*)', r'\1', ele) for ele in ele_list]
    ele_list = [re.sub(r'(\w+)(0|1|SFA)', r'\1', ele) for ele in ele_list]
    suff_list = [a.replace(b, '') for a, b in zip(alg_list, ele_list)]
    suff_list = [re.sub(r'^(_.*)', r' \1', suff) for suff in suff_list]
    ele_list = [re.sub(r'Arsen$', 'Arsenic', ele, flags=re.IGNORECASE) for ele in ele_list]
    e_list = []
    for ele in ele_list:
        if len(ele) > 2:
            e = ele[0].upper() + ele[1].lower() + ele[2:]
        elif len(ele) > 1:
            e = ele[0].upper() + ele[1].lower()
        else:
            e = ele[0].upper()
        e_list.append(e)
    ele_list = e_list
    return geophys_set, set(ele_list), set(suff_list)

In [3]:
# List of service providers
provider_list = ['nsw', 'csiro', 'tas', 'vic', 'qld', 'wa', 'nt', 'sa']

In [4]:
# Loop over all the serice providers
for provider in provider_list: 
    param = param_builder(provider, max_boreholes=200)
    if not param:
        print(f"Cannot build parameters: {param}")

    # Initialise reader
    reader = NVCLReader(param)
    if not reader.wfs:
        print(f"ERROR! Cannot contact WFS service for {provider}")

    # Get list of NVCL ids
    nvcl_id_list = reader.get_nvcl_id_list()

    # Exit if no nvcl ids found
    if not nvcl_id_list:
        print(f"ERROR! No NVCL ids for {provider}")

    # Find non-hyperspectral info
    geophys_set, ele_set, suff_set = find_nonhyperspectral(nvcl_id_list, provider, reader)
    
    print(f"\n\n{provider.upper()}:\n")
    print(f"Geophysics: {geophys_set}")
    print(f"Elements: {ele_set}")
    print(f"Suffixes: {suff_set}")



NSW:

Geophysics: {'Cond:Mhos/m'}
Elements: {'Co', 'Sillimanite', 'Fe', 'Sand', 'Au', 'Sandstone', 'Coal', 'Bi', 'As', 'Cu', 'Clay', 'Ag', 'Cr', 'Cd', 'Pb', 'Ni', 'Siltstone', 'Zn', 'Sb', 'Limestone', 'Pd', 'Sn', 'Claystone', 'W', 'Mn', 'Pt', 'Ti'}
Suffixes: {' (Ti)', '', 'ppm', ' ppm', ' (%)', ' (ppm)', 'pct', ' %', ' %(%)'}


CSIRO:

Geophysics: set()
Elements: set()
Suffixes: set()


TAS:

Geophysics: {'Mag sus', 'Mag Sus', 'Magsus', 'MAGSUS (SIx10 -5)'}
Elements: {'Na', 'Ce', 'P', 'Te', 'Co', 'Yb', 'Mt', 'QtzFsp', 'HrEE', 'Fe', 'Q', 'S', 'A', 'Au', 'Y', 'Zr', 'Sm', 'Bi', 'Rb', 'Cu', 'As', 'Ag', 'Se', 'Thite', 'Cr', 'K', 'Cd', 'Pb', 'Fsp', 'Mo', 'Ni', 'Ba', 'Sr', 'Qtz', 'U', 'Zn', 'Th', 'Ca', 'Sc', 'Sb', 'B', 'Cs', 'Ir', 'Lu', 'Ta', 'Ser', 'F', 'Sn', 'Br', 'Cb', 'Alkali', 'Mg', 'Nb', 'V', 'W', 'Mn', 'Hf', 'Plagioclase', 'La', 'Eu', 'Chl', 'An', 'Ti', 'O'}
Suffixes: {'', ' 10,100min d', ' av% (AP)', ' av% (KCa)', ' triangular OQ', '/Oth%', '/REE% 978d+915d/740d+802d+870d%', '/QF%',