This code opens pmaps from files and uses ipywidgets to do some basic interpretation on the Daly River data set.

TODO in 2020 add interpretation tool

Neil Symington
neil.symington@ga.gov.au

In [1]:
%matplotlib widget

In [2]:
import geopandas as gpd
import numpy as np
import pandas as pd
import aseg_gdf2
from shapely.geometry import Point
from shapely.wkt import loads
from scipy.io import loadmat
from scipy import spatial
import matplotlib.pyplot as plt
import rasterio
import netCDF4
import os

In [3]:
# Function for finding the nearest point from geopandas frame
def find_nearest_point_ind(gdf, pt):
    
    dist = gdf.distance(pt)
    # retrun the nearest index and the distance
    return dist.idxmin(), dist.min()

def extract_pmap_info(D, near_fid):
    """
    @param: D: python dictionary from loadmat function in scipy
    
    returns
    a more usebal dictionary
    """

    freq = D['M']['f'][0,0]

    pdf = freq / freq.sum(axis =1)[0]

    pdf[pdf == 0] = np.nan

    # Get the changepoint histogram

    cp = D['M']['cp'][0,0].flatten()

    cp_pdf = D['M']['cp'][0,0] / freq.sum(axis =1)[0]

    # Get the extent

    extent = [D['M']['vmin'][0,0][0,0], D['M']['vmax'][0,0][0,0], D['M']['pmax'][0,0][0,0], D['M']['pmin'][0,0][0,0]]

    # Get the information from the datafile

    p10 = near_fid[[x for x in gdf_AEM.columns if x.startswith('conductivity_p10')]].astype(np.float).values
    p50 = near_fid[[x for x in gdf_AEM.columns if x.startswith('conductivity_p50')]].astype(np.float).values
    p90 = near_fid[[x for x in gdf_AEM.columns if x.startswith('conductivity_p90')]].astype(np.float).values
    mean = near_fid[[x for x in gdf_AEM.columns if x.startswith('conductivity_mean')]].astype(np.float).values
    depth_cells = near_fid[[x for x in gdf_AEM.columns if x.startswith('depth')]].astype(np.float).values

    cond_cells = np.linspace(D['M']['vmin'][0,0][0,0], D['M']['vmax'][0,0][0,0], D['M']['nvcells'][0,0][0,0])
    
    
    laybins = D['M']['lhist'][0,0][0,0][1].flatten()
    lay_pob = D['M']['lhist'][0,0][0,0][2].flatten() / freq.sum(axis =1)[0]

    nsample = D['M']['nsample'][0,0][0,0]

    ndata = int(D['M']['ndata'][0,0][0,0])
    nsamples = np.int(D['M']['nsample'][0,0][0,0])
    nchains = np.int(D['M']['nchain'][0,0][0,0])
    burnin = int(D['M']['nburnin'][0,0][0,0])
    
    misfit = {}
    sample_no = {}
    
    for i in range(nchains):
        misfit[i] = D['M']['conv'][0,0]['misfit'][0,i].flatten()
        sample_no[i] = D['M']['conv'][0,0]['sample'][0,i].flatten()
        
    
    return {'conductivity_pdf': pdf, "change_point_pdf": cp_pdf, "conductivity_extent": extent,
           'cond_p10': p10, 'cond_p50': p50, 'cond_p90': p90, 'cond_mean': mean, 'depth_cells': depth_cells,
           'nlayer_bins': laybins, 'nlayer_prob': lay_pob, 'nsamples': nsample, 'ndata': ndata,
           "nchains": nchains, 'burnin': burnin, 'misfit': misfit, 'sample_no': sample_no, 'cond_cells': cond_cells}

def extract_data(near_fid):
    

    infile = near_fid['matfile']

    D = extract_pmap_info(loadmat(infile), near_fid)
    
    D['easting'] = near_fid['easting ']
    D['northing'] =  near_fid['northing ']
    D['fid'] = near_fid['fiducial ']
    
    # Get point elevation from grid
    point_elev = next(dataset.sample([[D['easting'],D['northing']]]))[0]

    # Spatial query to find nearest fiducial

    lci_coords = np.column_stack((lci_dat['easting'][:],
                                 lci_dat['northing'][:]))

    tree = spatial.KDTree(lci_coords.data)

    dist, ind = tree.query([D['easting'], D['northing']])

    D['elevation'] = lci_dat['elevation'][ind]

    D['lci_cond'] = lci_dat['conductivity'][ind]
    D['lci_depth_top'] = lci_dat['layer_top_depth'][ind]

    D['lci_doi'] = lci_dat['depth_of_investigation'][ind]

    D['line'] = near_fid['line ']

    return D



def HE_plot(D, outfile = None):
    fig = plt.figure(figsize = (12,10))

    fig.canvas.layout.width = '12in'
    fig.canvas.layout.height= '8in'

    ax1 = fig.add_axes([0.05, 0.2, 0.35, 0.7])
    ax2 = fig.add_axes([0.45, 0.2, 0.2, 0.7])
    ax3 = fig.add_axes([0.72, 0.2, 0.2, 0.35])
    ax4 = fig.add_axes([0.72, 0.6, 0.2, 0.3])
    cbar_ax = fig.add_axes([0.05, 0.1, 0.35, 0.02])


    # Plot probability map
    im = ax1.imshow(D['conductivity_pdf'], extent = D['conductivity_extent'],
                    aspect = 'auto', cmap = 'rainbow')
    #  PLot the median, and percentile plots
    ax1.plot(np.log10(D['cond_p10']), D['depth_cells'], c = 'k',linestyle='dashed', label = 'p10')
    ax1.plot(np.log10(D['cond_p90']), D['depth_cells'], c = 'k',linestyle='dashed', label = 'p90')
    ax1.plot(np.log10(D['cond_p50']), D['depth_cells'], c = 'k',label = 'p50')
    ax1.plot(np.log10(D['cond_mean']), D['depth_cells'], c = 'grey',label = 'mean')

    # for lci layered model we do some processing
    lci_expanded = np.zeros(shape=2 * len(D['lci_cond']) + 1,
                                 dtype=np.float)

    lci_expanded[1:] = np.repeat(D['lci_cond'], 2)

    depth_expanded = (np.max(D['lci_depth_top']) + 10) * np.ones(shape=len(lci_expanded),
                                                            dtype=np.float)

    depth_expanded[:-1] = np.repeat(D['lci_depth_top'], 2)

    ax1.plot(np.log10(lci_expanded), depth_expanded, c = 'pink',
             linestyle = 'dashed', label = 'lci')
    ax1.plot(ax1.get_xlim(), [D['lci_doi'], D['lci_doi']], c = 'pink',
             label = 'LCI doi')
    ax1.set_title('probability map')
    ax1.set_ylabel('depth (mBGL)')
    ax1.set_xlabel('log10 conductivity (S/m)')
    ax1.grid(which = 'both')
    ax1.set_xlim(D['conductivity_extent'][0], D['conductivity_extent'][1])
    ax1.set_ylim(D['conductivity_extent'][2], D['conductivity_extent'][3])
    ax1.legend()

    ax2.plot(D['change_point_pdf'], D['depth_cells'], label = 'P(change point)')
    ax2.set_ylim(ax2.get_ylim()[::-1])
    ax2.set_yticks(np.arange(0, 500, 20.))
    ax2.set_title('change point probability')
    ax2.set_ylim(D['conductivity_extent'][2], D['conductivity_extent'][3])
    ax2.legend()
    ax2.grid(which = 'both')

    #ax3.bar(D['nlayer_bins'], height = D['nlayer_prob'])
    #ax3.set_xlabel ("no of layers")
    ax3.imshow(mag, extent = [dataset.bounds[0], dataset.bounds[2],
                                     dataset.bounds[1], dataset.bounds[3]],
              cmap= 'Greys', vmin = -5., vmax = 5.)
    ax3.plot(D['easting'],D['northing'], 'ro')
    # Ax3 will be our location

    for item in D['misfit'].keys():
        sample = D['sample_no'][item]
        misfits = D['misfit'][item]
        ax4.plot(sample, misfits/D['ndata'])

    ax4.plot([1, D['nsamples']], [1,1], 'k')
    ax4.plot([D['burnin'], D['burnin']],[0.1,1e4], 'k')
    ax4.set_xlim([1, D['nsamples']])
    ax4.set_ylim(0.1, 1e4)

    ax4.set_xscale('log')
    ax4.set_yscale('log')

    ax4.set_xlabel("sample #")
    ax4.set_ylabel("Normalised misfit")

    fig.colorbar(im, cax=cbar_ax, orientation='horizontal')
    
    if outfile is not None:
        indir = r"C:\Temp"
        fname = os.path.join(indir, str(D['fid']) + "_" + str(D['line']) + '.png')
    else:
        fname =  outfile
    
    return fig



In [14]:
# Open geodataframe to extract the pmap file from the fiducial

infile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\garjmcmc\combined\rjmcmc_map.csv"

df = pd.read_csv(infile)

# Hack!
df.rename(columns = {'geometry': 'geom'}, inplace = True)

geom = [loads(p) for p in df['geom']]

df['geometry'] = geom

gdf_AEM = gpd.GeoDataFrame(df)

# Now lets join on the actual conductivity data

infile = r"C:\Users\PCUser\Desktop\NSC_data\data\AEM\HE\garjmcmc\combined\rjmcmc"

# Read in the data and convert to geodataframe

dat = aseg_gdf2.read(infile)

df = dat.df()

cols = [x for x in df.columns if "[" in x] + ['fiducial ']

gdf_AEM = pd.merge(gdf_AEM, df[cols], on = 'fiducial ')

# Hack to remove duplicates

gdf_AEM.drop_duplicates(subset=['fiducial '], keep='first', inplace=True)

In [39]:
gdf_AEM.columns[5:20]

Index(['fiducial ', 'easting ', 'northing ', 'elevation ', 'altimeter ',
       'nchains ', 'nsamples ', 'nburnin ', 'sampletime ', 'misfit_lowest ',
       'misfit_average ', 'ndepthcells ', 'geom', 'matfile', 'geometry'],
      dtype='object')

In [5]:
# bring in the LCI data

infile = r"C:\Users\PCUser\Desktop\AEM\LCI\HowardE_WB_MGA52.nc"

# Read in the data and convert to geodataframe

lci_dat = netCDF4.Dataset(infile)

In [6]:
# Now we bring in the magnetics to plot

inRaster = r"C:\Users\PCUser\Desktop\NSC_data\data\Magnetics\HE\HE_TMA_rtp_1VD.tif"

dataset = rasterio.open(inRaster)

mag = dataset.read(1)

mag[mag == dataset.get_nodatavals()] = np.nan


In [26]:
# Bring in a suset of the points to sample

infile = r"C:\Temp\HE_sel.csv"

df = pd.read_csv(infile)

# Get a mask with the fiducials

mask = gdf_AEM['fiducial '].isin(df['fiducial']) 

# Now subset the AEM

gdf_AEM_ss = gdf_AEM[mask]

gdf_AEM_ss['top_conductor'] = np.nan

print(len(gdf_AEM_ss))

461


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


In [31]:
# Create a generator to iterate through the pandas dataframe

def gen(df):
    for index, row in df.iterrows():
        yield index, row
        
cond_gen = gen(gdf_AEM_ss)

In [32]:

def onclick(event):
    if event.xdata != None and event.ydata != None:
        gdf_AEM_ss.at[ind, 'top_conductor'] = event.ydata



In [34]:
# Define some coordinates to investigate

ind, row = next(cond_gen)

near_fid = gdf_AEM_ss[gdf_AEM_ss['fiducial '] == row['fiducial ']]

D = extract_data(near_fid)
    
fig = HE_plot(D)

cid = fig.canvas.mpl_connect('button_press_event', onclick)

TypeError: expected str, bytes or os.PathLike object, not Series

In [35]:
near_fid

Unnamed: 0,uniqueid,survey,date,flight,line,fiducial,easting,northing,elevation,altimeter,...,changepoint [141],changepoint [142],changepoint [143],changepoint [144],changepoint [145],changepoint [146],changepoint [147],changepoint [148],changepoint [149],top_conductor
101,206,1303,20170725,20170725,114901,2084758.5,737184.8,8620812.0,8.9,0.0,...,307,313,337,445,495,437,663,928,12694,


In [66]:
cols = [x for x in gdf_AEM.columns if x.startswith('conductivity_p10')]

In [22]:
near_fid

Unnamed: 0,uniqueid,survey,date,flight,line,fiducial,easting,northing,elevation,altimeter,...,changepoint [140],changepoint [141],changepoint [142],changepoint [143],changepoint [144],changepoint [145],changepoint [146],changepoint [147],changepoint [148],changepoint [149]
97,201,1303,20170725,20170725,114901,2084756.0,737231.6,8620862.0,8.7,0.0,...,435,363,500,232,258,472,427,443,475,15595


In [72]:
D['easting']

739093.1