In [1]:
%load_ext autoreload
%autoreload 1
%aimport Utils
%aimport MatrixLinkGenerator
# import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
import tensorflow as tf
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%.3g" % x))
# np.set_printoptions(linewidth=np.inf)
from obspy import UTCDateTime as dt
import json
import matplotlib.pyplot as plt
from Utils import trainingResults, trainingResults2, predsMap
# plt.rcParams['figure.figsize'] = [50, 200]
plt.rcParams['figure.figsize'] = [16, 12]
params = json.loads('''{
    "extents": {
        "ak": {
            "latMin": 55.0,
            "latMax": 74.0,
            "lonMin": -163.0,
            "lonMax": -130.0
        },
        "s1": {
            "latMin": 22.0,
            "latMax": 40.0,
            "lonMin": 33.0,
            "lonMax": 62.0
        }
    },
    "location": "s1",
    "maxDepth": 50.0,
    "maxStationElevation": 1.0,
    "trainingGeneratorSourceFile": "./Training/Inputs/S1 00.gz",
    "trainingEventsFile": "./Training/Event Files/S1 Events 22-40-33-62 3+ Arrivals 120 TimeNorm.npz",
    "validationGeneratorSourceFile": "./Inputs/IDC Test Cleaned.gz",
    "validationEventsFile": "./Training/Event Files/IDC Test.npz",
    "arrivalProbsFile": "./Training/RSTT Model/S1 Dropouts.npy",
    "stationFile": "./Archive/Stations/S1 Station List.txt",
    "oneHot": "False",
    "arrivalProbMods": {
        "Pg": 5.0,
        "Pn": 3.0,
        "Sg": 5.0,
        "Sn": 25.0
    },
    "eventsPerExample": {
        "min": 1,
        "max": 4
    },
    "stationsPerBatch": {
        "min": 45,
        "max": 55
    },
    "timeShifts": {
        "min": -0.50,
        "max": 0.50
    },
    "batchSize": 1000,
    "samplesPerEpoch": 10000,
    "validationSamplesPerEpoch": 250000,
    "epochs": 2,
    "model": "./Models/Station List E722 L0.0046 AL0.0012 NL0.0023 LL0.0022 AP0.9606 AR0.9619 NP0.7507 NR0.8198 HL84.3.h5",
    "evalInFile": "./Inputs/S1 00.gz",
    "evalOutFile": "./Training/Evaluation.gz",
    "prlEvalOutFile": "./Training/PRL Evaluation.gz",
    "maxArrivals": 50,
    "minArrivals": 5,
    "maxNoise": 0.20,
    "clusterStrength": 0.90,
    "timeNormalize": 120,
    "associationWindow": 300,
    "evalWindow": 10.0,
    "phases": {
        "Pg": 0, "PcP": 0, "Pb": 0,
        "P": 1, "Pn": 1,
        "S": 2, "Sg": 2, "ScP": 2, "Lg": 2, "Sb": 2,
        "Sn": 3
    },
    "modelArch": {
        "dense": [32, 32, 64, 128, 128],
        "transformers": [256, 256],
        "heads": 4,
        "dense2": [128, 128, 128],
        "grus": [256, 256]
    }
}''')
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
#MatrixLinkTrainer
import tensorflow as tf
import numpy as np
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import Input, Embedding, Reshape, concatenate, Dense, Bidirectional, GRU, MultiHeadAttention, LayerNormalization
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
import logging
import json
from MatrixLinkGenerator import generateEventFile, synthesizeEvents, synthesizeEventsFromEventFile
from Utils import nzBCE, nzMSE, nzPrecision, nzRecall, nzHaversine, nzTime, trainingResults

@tf.autograph.experimental.do_not_convert
def MatrixLink(params):
    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    def buildModel(modelArch):
        outputs = []
        inputs = []
        numericalInputs = Input(shape=(None,4), name='numerical_features')
        outputs.append(numericalInputs)
        inputs.append(numericalInputs)
        categoricalInputs = Input(shape=(None,1), name='phase')
        embed = Embedding(5, 2, trainable=True, embeddings_initializer=RandomNormal())(categoricalInputs)
        embed = Reshape(target_shape=(-1,2))(embed)
        outputs.append(embed)
        inputs.append(categoricalInputs)
        outputs = concatenate(outputs)

        def TransformerBlock(inputs, embed_dim, ff_dim, num_heads=2, rate=0.1, eps=1e-6):
            attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(inputs, inputs)
#             attn_output = Dropout(rate)(attn_output)
            out1 = LayerNormalization(epsilon=eps)(inputs + attn_output)
            ffn_output = Dense(ff_dim, activation="relu")(out1)
            ffn_output = Dense(embed_dim)(ffn_output)
#             ffn_output = Dropout(rate)(ffn_output)
            return LayerNormalization(epsilon=eps)(out1 + ffn_output) 

        for d1Units in modelArch['dense']:
            outputs = Dense(units=d1Units, activation=tf.nn.relu)(outputs)
        transformerOutputs = outputs
        gruOutputs = outputs

        for tUnits in modelArch['transformers']:
            transformerOutputs = TransformerBlock(transformerOutputs, d1Units, tUnits, modelArch['heads'])
        for gUnits in modelArch['grus']:
            gruOutputs = Bidirectional(GRU(gUnits, return_sequences=True))(gruOutputs)

        outputs = concatenate([transformerOutputs, gruOutputs], axis=2)
        for tUnits in modelArch['transformers']:
            outputs = TransformerBlock(outputs, d1Units+gUnits*2, tUnits, modelArch['heads'])

        association = Dense(units=params['maxArrivals'], activation=tf.nn.sigmoid, name='association')(outputs)
        location = Dense(units=2, name='location')(outputs)
        noise = Dense(units=1, activation=tf.nn.sigmoid, name='noise')(outputs)
        time = Dense(units=1, name='time')(outputs)
        
        model = Model(inputs=inputs, outputs=[association, location, noise, time])
        losses = { 'association': nzBCE, 'location': nzMSE, 'noise': nzBCE, 'time': nzMSE }
        weights = { 'association': 1.0, 'location': 1.0, 'noise': 0.25, 'time': 0.5 }
        metrics = { 'association': [nzPrecision, nzRecall],
                    'location': nzHaversine,
                    'noise': [nzPrecision, nzRecall],
                    'time': nzTime
                  }
        model.compile(optimizer=Adam(clipnorm=0.00001), loss=losses, loss_weights=weights, metrics=metrics)
        return model

    model = buildModel(params['modelArch'])
    try:
        model.load_weights(params['model'])
        print("Loaded previous weights.")
    except Exception as e:
        print(e)
        print("No previous weights loaded.")
    print(model.summary())
    return model

class saveCb(Callback):
    def on_train_begin(self, logs=None):
        self.best = 100.
    def on_epoch_end(self, epoch, logs=None):
        if logs['loss'] < self.best:
            self.best = logs['loss']
            print('Saving best model with loss', self.best)
            modelName = 'E%03d L%.4f AL%.4f LL%.4f NL%.4f TL%.4f AP%.4f AR%.4f NP%.4f NR%.4f HL%.1f TL%.3f.h5' %\
                (epoch, logs['loss'], logs['association_loss'], logs['location_loss'], logs['noise_loss'], logs['time_loss'], logs['association_nzPrecision'], logs['association_nzRecall'], logs['noise_nzPrecision'], logs['noise_nzRecall'], logs['location_nzHaversine'], logs['time_nzTime'])
            model.save("./Training/Models/"+modelName)

In [None]:
# tf.config.threading.set_intra_op_parallelism_threads(2)
# tf.config.threading.set_inter_op_parallelism_threads(2)

# trainingEvents, trainingEventList = generateEventFile(params, trainingSet=True)
# validationEvents, validationEventList = generateEventFile(params)

# generator = synthesizeEventsFromEventFile(params, trainingEvents, trainingEventList, trainingSet=True)
generator = synthesizeEvents(params)
# vgen = synthesizeEventsFromEventFile(params, validationEvents, validationEventList)
# vgen = synthesizeEvents(params)

model = MatrixLink(params)
history = model.fit(generator,
#                  validation_data=vgen,
                 steps_per_epoch= params['samplesPerEpoch']/params['batchSize'],
#                  validation_steps = params['validationSamplesPerEpoch']/params['batchSize'],
                 epochs=params['epochs'],
                 callbacks=[saveCb(), EarlyStopping(monitor='loss', patience=40), CSVLogger('./Training/Models/logs.csv', append = True)],
                 verbose=1)
trainingResults(np.genfromtxt('./Training/Models/logs.csv', delimiter=',', names=True))

In [3]:
#MatrixLink
import tensorflow as tf
import numpy as np
import pandas as pd
import json
from collections import deque
from math import ceil
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
from tensorflow.keras.models import load_model
from obspy import UTCDateTime
from scipy.cluster.hierarchy import ward, fcluster
from scipy.spatial.distance import squareform
from Utils import nzBCE, nzMSE1, nzMSE2, nzHaversine, nzAccuracy, nzPrecision, nzRecall, nzTime, evaluate

# Build permutation lists and matrices to predict on
def permute(X):
    outerWindow = params['associationWindow']
    minArrivals = params['minArrivals']
    maxArrivals = params['maxArrivals']
    edgeWindow = outerWindow/5
    numWindows = ceil((X[:,2].max() + edgeWindow*2) / edgeWindow)
    start = -edgeWindow
    
#     X_perm = [[start, start+outerWindow] for start in range(int(-edgeWindow),(numWindows-1)*int(edgeWindow),int(edgeWindow))]
#     X_perm = [[perm[0][:maxArrivals], perm[1]] for perm in [[np.where((X[:,2] >= window[0]) & (X[:,2] < window[1]))[0],window[0]] for window in X_perm] if len(perm[0]) >= minArrivals]
#     innerWindows = [window[1]+edgeWindow for window in X_perm]
#     X_perm = [perm[0] for perm in X_perm]
#     X_test = np.stack([np.concatenate((X[X_perm[perm]], np.zeros((maxArrivals - len(X_perm[perm]), 5)))) for perm in range(len(X_perm))])
#     for i in range(len(X_perm)):
#         X_test[i,:len(X_perm[i]),2] -= X_test[i,0,2]
#     X_test[:,:,2] /= params['timeNormalize']
#     return X_perm, X_test, innerWindows

    innerWindows = deque()
    X_perm = deque()
    for window in range(numWindows):
        print('\rCreating permutations... ' + str(window) + ' / ' + str(numWindows), end='')
        end = start+outerWindow
        windowArrivals = np.where((X[:,2] >= start) & (X[:,2] < end))[0]
        start += edgeWindow
        if len(windowArrivals) >= minArrivals:
            X_perm.append(windowArrivals[:maxArrivals])
            innerWindows.append(start)
    X_test = np.zeros((len(X_perm),maxArrivals,5))
    for i in range(len(X_perm)):
        X_test[i,:len(X_perm[i])] = X[X_perm[i]]
        X_test[i,:len(X_perm[i]),2] -= X_test[i,0,2]
    X_test[:,:,2] /= params['timeNormalize']
    return X_perm, X_test, innerWindows

def buildEvents(X, labels, X_perm, X_test, Y_pred, innerWindows):
    # Get clusters for predicted matrix at index i
    def cluster(i):
        valids = np.where(X_test[i][:,-1])[0]
        validPreds = Y_pred[0][i][valids,:len(valids)]
        L = 1-((validPreds.T + validPreds)/2)
        np.fill_diagonal(L,0)
        return fcluster(ward(squareform(L)), params['clusterStrength'], criterion='distance')

    innerWindow = params['associationWindow'] * (3/5)
    minArrivals = params['minArrivals']
    catalogue = pd.DataFrame(columns=labels.columns)
#     events = deque()
    evid = 1
    created = 1
    for window in range(len(X_perm)):
        clusters = cluster(window)
        for c in np.unique(clusters):
            pseudoEventIdx = np.where(clusters == c)[0]
            pseudoEvent = X_perm[window][pseudoEventIdx]
            if len(pseudoEvent) >= minArrivals:
                event = X[pseudoEvent]
                # check for containment within inner window
                contained = (event[0,2] >= innerWindows[window]) & (event[-1,2] <= (innerWindows[window]+innerWindow))
                if contained:
                    candidate = labels.iloc[pseudoEvent].copy()
                    try:
                        candidate['ETIME'] = candidate.TIME.min() + np.median(Y_pred[3][window][pseudoEventIdx][:]*params['timeNormalize'])
                    except:
                        candidate['ETIME'] = -1
                    candidate['PLAT'] = Y_pred[1][window][pseudoEventIdx][:,0]*latRange+extents[0]
                    candidate['PLON'] = Y_pred[1][window][pseudoEventIdx][:,1]*lonRange+extents[2]
#                     candidate['LAT'] = np.median(Y_pred[1][window][pseudoEventIdx][:,0])*latRange+extents[0]
#                     candidate['LON'] = np.median(Y_pred[1][window][pseudoEventIdx][:,1])*lonRange+extents[2]
                    candidate['LAT'] = np.median(candidate.PLAT)
                    candidate['LON'] = np.median(candidate.PLON)
                    # check for existence in catalogue
                    overlap = candidate.ARID.isin(catalogue.ARID).sum()
                    if overlap == 0:
                        print("\rPromoting event " + str(created), end='')
#                         events.append(pseudoEvent)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
                        created += 1
                    elif len(pseudoEvent) > overlap:
                        catalogue.drop(catalogue[catalogue.ARID.isin(candidate.ARID)].index, inplace=True)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
    catalogue = catalogue.groupby('EVID').filter(lambda x: len(x) >= minArrivals)
    print()
    return catalogue

def matrixLink(X, labels, denoise=False):
    print("Creating permutations... ", end='')
    X_perm, X_test, innerWindows = permute(X)
    print("\nMaking initial predictions... ", end='')
    Y_pred = model.predict(X_test) if oneHot else model.predict({"phase": X_test[:,:,3], "numerical_features": X_test[:,:,[0,1,2,4]]})
    if denoise:
        print("\nEliminating noise and predicting again... ", end='')
        for _ in range(3):
            valids = deque()
            for i in range(len(X_perm)):
#                 valid = np.where(Y_pred[2][i] < 0.008)[0]
#                 valids.append(X_perm[i][valid[valid < len(X_perm[i])]])
                noise = np.where(Y_pred[2][i] > 0.008)[0]
                valids.append(np.delete(X_perm[i], noise[noise < len(X_perm[i])]))
            valids = np.array(list(set(np.concatenate(valids))))
            X = X[valids]
            labels = labels.iloc[valids]

            X_perm, X_test, innerWindows = permute(X)
            Y_pred = model.predict(X_test) if oneHot else model.predict({"phase": X_test[:,:,3], "numerical_features": X_test[:,:,[0,1,2,4]]})
    print("clustering and building events...")
    catalogue = buildEvents(X, labels, X_perm, X_test, Y_pred, innerWindows)
    return catalogue

def processInput(oneHot = False):
    print("Reading input file... ", end='')
    X = []
    labels = []
    for i, r in inputs.iterrows(): # I can do this better
        phase = phases[r.PHASE]
        time = UTCDateTime(r.TIME)
        lat = abs((r.ST_LAT - extents[0]) / latRange)
        lon = abs((r.ST_LON - extents[2]) / lonRange)
        otime = time - UTCDateTime(0)
        try:
            if oneHot:
                arrival = [lat, lon, otime, 0, 0, 0, 0, 1]
                arrival[3+phase] = 1
            else:
                arrival = [lat, lon, otime, phase, 1]
            X.append(arrival)
            labels.append(r)
        except Exception as e:
            print(e)
    X = np.array(X)
    idx = np.argsort(X[:,2])
    X = X[idx,:]
    X[:,2] -= X[0,2]
    labels = pd.DataFrame([labels[i] for i in idx])
    print("%d arrivals found" % len(labels))
    return X, labels

if __name__ == "__main__":
    pd.options.display.float_format = "{:.2f}".format
#     with open("Parameters.json", "r") as f:
#         params = json.load(f)
    phases = params['phases']
    extents = np.array(list(params['extents'][params['location']].values())+[params['maxDepth'],params['maxStationElevation']])
    latRange = abs(extents[1] - extents[0])
    lonRange = abs(extents[3] - extents[2])
    model = load_model(params['model'], custom_objects={'nzBCE':nzBCE, 'nzMSE':nzMSE2, 'nzMSE1':nzMSE1, 'nzMSE2':nzMSE2, 'nzHaversine':nzHaversine, 'nzPrecision':nzPrecision, 'nzRecall':nzRecall, 'nzAccuracy':nzAccuracy, 'nzTime':nzTime}, compile=True)

    inFiles = ['./Inputs/S1 00.gz']
#     inFiles = ['./Inputs/S1 50.gz', './Inputs/S1 25.gz', './Inputs/S1 15.gz', './Inputs/S1 00.gz']
    oneHot = False
    denoise = True
    evals = {file:[] for file in inFiles}
    for i in range(len(inFiles)):
        inputs = pd.read_pickle(inFiles[i]).sort_values(by=['TIME']).reset_index(drop=True)
        params['evalInFile'] = inFiles[i]
        start = inputs[inputs.TIME >= inputs.TIME.quantile(.8)].index[0]
        end = inputs[inputs.TIME >= inputs.TIME.quantile(.825)].index[0]
        inputs = inputs[start:end]

        X, labels = processInput(oneHot)
        outputs = matrixLink(X, labels, denoise)
        outputs.to_pickle(params['evalOutFile'])
        evals[inFiles[i]] = evaluate(params, inputs, outputs, verbose=False)

    print("Consolidated summary for:", params['model'])
    print('File\tAHM\t Location')
    for file in evals.keys():
        print(file[-5:-3], "{:8.2f}".format(evals[file][0]), "{:8.2f}".format(evals[file][1]))

Reading input file... 5113 arrivals found
Creating permutations... 17181 / 17182
Making initial predictions... 

UnknownError:    Fail to find the dnn implementation.
	 [[{{node CudnnRNN}}]]
	 [[model_2/bidirectional_4/forward_gru_4/PartitionedCall]] [Op:__inference_predict_function_7084]

Function call stack:
predict_function -> predict_function -> predict_function


In [None]:
outputs = pd.read_pickle(params['evalOutFile'])[740:770]
locations = outputs[outputs.ORID != -1].groupby('EVID').first().reset_index()[['EV_LAT', 'EV_LON', 'LAT', 'LON']].values
stations = np.unique(outputs[['ST_LAT', 'ST_LON']].values, axis=0)
receivingStations = outputs[outputs.ORID != -1].groupby('EVID').head().reset_index()[['LAT', 'LON', 'ST_LAT', 'ST_LON']].values

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import cartopy.feature as cfeature
from matplotlib.colors import Normalize
def predsMap(locations, stations, receivingStations=None, outfile=None):
    extend = 1
    lat_min = extents[0] - extend
    lat_max = extents[1] + extend
    lon_min = extents[2] - extend
    lon_max = extents[3] + extend
#     lat_min = locations[:,[0,2]].min() - extend
#     lat_max = locations[:,[0,2]].max() + extend
#     lon_min = locations[:,[1,3]].min() - extend
#     lon_max = locations[:,[1,3]].max() + extend
    
    locationsR = locations*0.017453292519943295
    dlat_dlon = (locationsR[:,[0,1]] - locationsR[:,[2,3]]) / 2
    a = tf.sin(dlat_dlon[:,0])**2 + tf.cos(locationsR[:,0]) * tf.cos(locationsR[:,2]) * tf.sin(dlat_dlon[:,1])**2
    diff = 2*tf.asin(tf.sqrt(a))*6378.1

    cmap = plt.cm.rainbow
    norm = Normalize(vmin=0, vmax=500)
    colors = cmap(norm(diff))
    
    fig, ax = plt.subplots(figsize=(25,25), subplot_kw={'projection': ccrs.PlateCarree()}, sharex=True, sharey=True)
    states_provinces = cfeature.NaturalEarthFeature(
        category='cultural',
        name='admin_1_states_provinces_lines',
        scale='50m',
        facecolor='none')
    ax.add_image(cimgt.Stamen('terrain-background'), 4)
    ax.add_feature(states_provinces, edgecolor='gray')
    ax.coastlines('110m')
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.RIVERS)
    ax.add_feature(cfeature.LAKES)

    for s in range(len(stations)):
        ax.plot(stations[s][1], stations[s][0], 'o', markersize=7, c='k')
    for e in range(len(locations)):
        obs = locations[e][0:2]
        pred = locations[e][2:4]
        ax.plot(obs[1], obs[0], 'o', markersize=3, c='g')
        ax.plot(pred[1], pred[0], 'o', markersize=3, c='r', alpha=0.7)
        ax.plot([obs[1], pred[1]], [obs[0], pred[0]], color='green', alpha=0.5)
    if receivingStations is not None:
        for p in range(len(receivingStations)):
            pred = receivingStations[p][0:2]
            stas = receivingStations[p][2:4]
            ax.plot([pred[1], stas[1]], [pred[0], stas[0]], color='red', alpha=0.5)
    ax.set_extent([lon_min, lon_max, lat_min, lat_max])
#     ax.legend(['Stations','Observed','Predicted'])

    fig.canvas.draw()
    fig.tight_layout(pad=0, w_pad=1, h_pad=0)
    if outfile is not None:
        fig.savefig(outfile)
    plt.show()

In [None]:
#Testing stuff
# extents = np.array(list(params['extents'][params['location']].values())+[params['maxDepth'],params['maxStationElevation']])
# latRange = abs(extents[1] - extents[0])
# lonRange = abs(extents[3] - extents[2])
synth = synthesizeEvents(params)
test = next(synth)
# total = params['batchSize']*params['maxArrivals']*params['maxArrivals']
# padding = np.sum(test[0]['numerical_features'][:,:,3]==0)*params['maxArrivals']
# positiveProp = np.sum(test[1]==1) / (total-padding)
# y = np.bincount(test[0]['phase'].flatten().astype(int))
# y[0] -= padding/params['maxArrivals']
# ii = np.nonzero(y)
# phaseProportions = np.vstack((ii,y[ii]/((total-padding)/params['maxArrivals']))).T
# print("Total:",total)
# print("Padding:",padding,padding/total)
# print("Ones:",np.sum(test[1]==1),positiveProp)
# print("Phases:\n",phaseProportions)
test
# model = load_model(params['model'], custom_objects={'nzBCE':nzBCE, 'nzMSE':nzMSE, 'nzHaversine':nzHaversine, 'nzPrecision':nzPrecision, 'nzRecall':nzRecall})
# preds = model.predict(test[0])
# preds[1][:,:,0] = preds[1][:,:,0]*latRange + extents[0]
# preds[1][:,:,1] = preds[1][:,:,1]*lonRange + extents[2]
# test[1]['location'][:,:,0] = test[1]['location'][:,:,0]*latRange + extents[0]
# test[1]['location'][:,:,1] = test[1]['location'][:,:,1]*lonRange + extents[2]

# def cluster(data, i):
#     valids = np.where(test[0]['numerical_features'][i][:,3])[0]
#     validPreds = data[i][valids,:len(valids)]
#     L = 1-((validPreds.T + validPreds)/2)
#     np.fill_diagonal(L,0)
#     return fcluster(ward(squareform(L)), params['clusterStrength'], criterion='distance')

# i=0
# valids=np.where(test[0]['numerical_features'][i][:,3])
# numValid=len(valids[0])
# print("Arrivals:")
# print(test[0]['numerical_features'][i][valids])
# print("Predicted clusters:")
# print(cluster(preds[0], i))
# # print("Actual matrix:")
# # print(test[1]['association'][i][:numValid,:numValid])
# print("Actual clusters:")
# print(cluster(test[1]['association'], i))
# print("Predicted locations/Noise:")
# print(np.hstack([preds[1][i][:numValid], preds[2][i][:numValid]]))
# print("Actual locations/Noise:")
# print(np.hstack([test[1]['location'][i][:numValid], np.expand_dims(test[1]['noise'][i][:numValid],axis=1)]))

# locations = []
# for ex in range(params['batchSize']):
#     realClusters = cluster(test[1]['association'], ex)
#     evids, counts = np.unique(realClusters, return_counts=True)
#     evids = evids[np.where(counts >= params['minArrivals'])]
#     for evid in range(len(evids)):
#         latlons = test[1]['location'][ex][np.where(realClusters == evids[evid])]
#         realLat, realLon = np.median(latlons[:,0]), np.median(latlons[:,1])
#         latlons = preds[1][ex][np.where(realClusters == evids[evid])]
#         predLat, predLon = np.median(latlons[:,0]), np.median(latlons[:,1])
#         locations.append([realLat, realLon, predLat, predLon])
# locations = np.array(locations)
# stations = test[0]['numerical_features'][:,:,[0,1]].reshape(-1,2)
# stations = np.unique(stations, axis=0)
# stations[:,0] = stations[:,0]*latRange + extents[0]
# stations[:,1] = stations[:,1]*lonRange + extents[2]

# print(test[1]['association'][i])
# print(np.round(preds[0][i]))
# print(test[1]['noise'][i])
# print(np.round(preds[2][i].T))
# from tensorflow.keras import backend as K

In [48]:
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    lat1 = lat1*0.017453292519943295
    lon1 = lon1*0.017453292519943295
    lat2 = lat2*0.017453292519943295
    lon2 = lon2*0.017453292519943295
    dlat = lat2 - lat1
    dlon = lon2 - lon1    
    a = tf.sin(dlat/2.0)**2 + tf.cos(lat1) * tf.cos(lat2) * tf.sin(dlon/2.0)**2
    c = 2 * tf.asin(tf.sqrt(a))
    return 6378.1 * c

haversine(-00.1, 0, 0.001, 0)

<tf.Tensor: shape=(), dtype=float32, numpy=11.243204>