In [643]:
%load_ext autoreload
%autoreload 1
%aimport Utils
%aimport MatrixLinkGenerator
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
import tensorflow as tf
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
np.set_printoptions(edgeitems=30, linewidth=100000, formatter=dict(float=lambda x: "%.3g" % x))
# np.set_printoptions(linewidth=np.inf)
from obspy import UTCDateTime as dt
import json
import matplotlib.pyplot as plt
from Utils import trainingResults, trainingResults2, predsMap
# plt.rcParams['figure.figsize'] = [50, 200]
plt.rcParams['figure.figsize'] = [16, 12]
params = json.loads('''{
    "extents": {
        "ak": {
            "latMin": 55.0,
            "latMax": 74.0,
            "lonMin": -163.0,
            "lonMax": -130.0
        },
        "s1": {
            "latMin": 22.0,
            "latMax": 40.0,
            "lonMin": 33.0,
            "lonMax": 62.0
        },
        "global": {
            "latMin": -90.0,
            "latMax": 90.0,
            "lonMin": -180.0,
            "lonMax": 180.0
        }
    },
    "location": "global",
    "maxDepth": 50.0,
    "maxStationElevation": 1.0,
    "trainingGeneratorSourceFile": "./Inputs/IDC 10-20.gz",
    "trainingEventsFile": "./Training/Event Files/IDC 10-20 ECEF.npz",
    "validationGeneratorSourceFile": "./Inputs/IDC 10-20.gz",
    "validationEventsFile": "./Training/Event Files/IDC 10-20 ECEF.npz",
    "arrivalProbsFile": "./Training/RSTT Model/S1 Dropouts.npy",
    "stationFile": "./Archive/Stations/S1 Station List.txt",
    "oneHot": "True",
    "arrivalProbMods": {
        "Pg": 5.0,
        "Pn": 3.0,
        "Sg": 5.0,
        "Sn": 25.0
    },
    "eventsPerExample": {
        "min": 6,
        "max": 20
    },
    "stationsPerBatch": {
        "min": 45,
        "max": 55
    },
    "timeShifts": {
        "min": -0.50,
        "max": 0.50
    },
    "batchSize": 100,
    "samplesPerEpoch": 1000000,
    "validationSamplesPerEpoch": 250000,
    "epochs": 1000,
    "model": "./Training/Models/IDC/E026 L0.0120 AL0.0069 LL1796.6814 TL0.0060 AA0.4488 AP0.7215 AR0.2058.h5",
    "evalInFile": "./Inputs/S1 00.gz",
    "evalOutFile": "./Training/Evaluation.gz",
    "prlEvalOutFile": "./Training/PRL Evaluation.gz",
    "maxArrivals": 250,
    "minArrivals": 5,
    "maxNoise": 0.20,
    "clusterStrength": 0.9,
    "timeNormalize": 3600,
    "associationWindow": 3600,
    "evalWindow": 10.0,
    "phases": {
        "P": 0,
        "LR": 1,
        "Pn": 2,
        "T": 3,
        "tx": 4,
        "N": 5,
        "Sx": 6,
        "Pg": 7,
        "Lg": 8,
        "Sn": 9,
        "S": 10
    },
    "modelArch": {
        "dense": [32, 32, 32, 64, 64],
        "transformers": [128, 128],
        "heads": 4,
        "dense2": [128, 128, 128],
        "grus": [128, 128]
    }
}''')
tf.config.list_physical_devices()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [593]:
import random
import rstt
from copy import deepcopy
from collections import deque
modelPath = "./Training/RSTT Model/pdu202009Du.geotess"

def generateEventFile(params, trainingSet = False):
    if trainingSet:
        eventsFile = params['trainingEventsFile']
        generatorFile = params['trainingGeneratorSourceFile']
    else:
        eventsFile = params['validationEventsFile']
        generatorFile = params['validationGeneratorSourceFile']
    try:
        events = np.load(eventsFile, allow_pickle=True)['events'].flatten()[0]
        print("Training events loaded.") if trainingSet else print("Validation events loaded.")
    except:
        print("Events not loaded. Building from scratch.")
        extents = np.array(list(params['extents'][params['location']].values())+[params['maxDepth'],params['maxStationElevation']])
        latRange = abs(extents[1] - extents[0])
        lonRange = abs(extents[3] - extents[2])
        timeNormalize = params['timeNormalize']
        phases = params['phases']
        events = {}
        inputArrivals = pd.read_pickle(generatorFile)
        groupedEvents = (inputArrivals.groupby('EVID').filter(lambda x: len(x) >= params['minArrivals'])).groupby('EVID')
        count = 0
        for eid, arrivals in groupedEvents:
            count += 1
            print("\rBuilding event list: " + str(count) + ' / ' + str(len(groupedEvents)), end='')
            eventArrivals = []
            first = dt(arrivals.TIME.min())
            evtime = -(first - dt(arrivals.EV_TIME.min())) / timeNormalize
            for i, arrival in arrivals.iterrows():
                sx,sy,sz = ll2ecef(arrival.ST_LAT, arrival.ST_LON)
                ex,ey,ez = ll2ecef(arrival.EV_LAT, arrival.EV_LON)
                thisArrival = [sx,                                        # normalized station x
                               sy,                                        # normalized station y
                               sz,                                        # normalized station z
                               ((dt(arrival.TIME)-first)/timeNormalize),  # normalized arrival time
                               phases[arrival.PHASE],                     # phase
                               1.,                                        # valid arrival flag
                               5.0 / timeNormalize,                       # arrival uncertainty
                               0.9,                                       # retention rate when dropping some arrivals
                               ex,                                        # normalized event x
                               ey,                                        # normalized event y
                               ez,                                        # normalized event z
                               evtime]                                    # normalized event time (relative to first arrival)
                eventArrivals.append(thisArrival)
            events[eid] = np.array(eventArrivals)
        np.savez_compressed(eventsFile, events=events)
        print()
    eventList = list(events.keys())
    return events, eventList

def buildAssociationMatrix(evids):
    L = np.zeros((len(evids), len(evids))) + 99
    sparse_evids = evids[evids>=0]
    l = np.ones((len(sparse_evids), len(sparse_evids))) * sparse_evids.reshape((-1, 1))
    L[:len(sparse_evids), :len(sparse_evids)] = (l == l.T) * 1
    return L

def synthesizeEventsFromEventFile(params, events, eventList, trainingSet = False):
    maxArrivals = params['maxArrivals']
    minTimeShift = params['timeShifts']['min']
    maxTimeShift = params['timeShifts']['max']
    minEvents = params['eventsPerExample']['min']
    maxEvents = params['eventsPerExample']['max']+1 # because using in np.random.randint
    dropFactor = 0.5
    batchSize = params['batchSize']

    while True:
        X = []
        Y = []
        example = 0
        while example < batchSize:
            #Setup - choose random events, with the first being the primary event
            numEvents = np.random.randint(minEvents, maxEvents)
            chosenEvents = random.sample(eventList, numEvents)
            timeShifts = np.random.uniform(minTimeShift, maxTimeShift, size=numEvents-1)
            for i in range(0, len(chosenEvents)):
                thisEvent = events[chosenEvents[i]]
                #Randomly drop some picks from the event
                if trainingSet:
                    drops = thisEvent[:,7]
                    drops = drops + dropFactor*(1-drops) if dropFactor > 0 else drops*(1+dropFactor)
                    drops = np.random.binomial(1,drops)
                    idx = np.where(drops==1)[0]
                    thisEvent = thisEvent[idx,:]
                if i == 0:
                    sequence = thisEvent
                    sequence[:,5] = i
                else:
                    #Add the picks from this event
                    currentLength = len(sequence)
                    sequence = np.append(sequence, thisEvent, axis=0)
                    #Shift the starting time of this event
                    sequence[currentLength:currentLength+len(thisEvent),[3,11]] += timeShifts[i-1]
                    sequence[currentLength:currentLength+len(thisEvent),5] = i

            #Add random arrival time errors, except for the first pick of the primary event
            if trainingSet:
                timeShifts = np.random.uniform(-sequence[1:,6]*0.5, sequence[1:,6]*0.5)
                sequence[1:,3] += timeShifts

            #Sort by arrival time, drop picks with negative arrival times
            idx = np.argsort(sequence[:,3])
            remove = len(np.where((sequence[:,3] < 0))[0])
            idx = idx[remove:]
            sequence = sequence[idx,:]
            if len(sequence) == 0: #We lost all the valid arrivals, so scrap this training example
                continue

            #Make labels array
            labels = buildAssociationMatrix(sequence[:,5])

            #Reset primary event times
            ones = np.where(labels[0]==1)
#             if len(ones[0]) == 0: #We lost all the valid arrivals, so scrap this training example
#                 continue
            sequence[ones,3] -= sequence[ones,3][0][0]
            idx = np.argsort(sequence[:,3])

            #Truncate picks over maximum allowed
            idx = idx[:maxArrivals]
            sequence = sequence[idx,:]
            labels = labels[idx,:maxArrivals]

            sequence[:,5] = 1.
            #Pad the end if not enough picks were selected
            padding = maxArrivals - len(sequence)
            if padding > 0:
                labels = np.pad(labels, (0, maxArrivals-len(labels)), constant_values=99.)
                sequence_ = np.full((maxArrivals, 12), 0.)
#                 sequence_[sequence.shape[0]:, [8,9,10]] = 99.
                sequence_[:sequence.shape[0], :] = sequence
                sequence = sequence_
            X.append(sequence)
            Y.append(labels)
            example += 1
            
        #Yield these training examples
        X = np.array(X)
        Y = {"association": np.array(Y), "location": X[:,:,[8,9,10]], "time": X[:,:,11]}
        X = {"phase": X[:,:,4], "numerical_features": X[:,:,[0,1,2,3,5]]}
        yield X, Y

In [594]:
maxArrivals = params['maxArrivals']
from tensorflow.keras import backend as K
from geographiclib.geodesic import Geodesic
from tensorflow.keras.losses import binary_crossentropy as BCE
from tensorflow.keras.metrics import binary_accuracy

K.set_floatx('float64')
maxArrivals = params['maxArrivals']
matrixSize = maxArrivals**2
extents = np.array(list(params['extents'][params['location']].values())+[params['maxDepth'],params['maxStationElevation']])
latRange = abs(extents[1] - extents[0])
lonRange = abs(extents[3] - extents[2])
timeNormalize = params['timeNormalize']
zero = np.float64(0)
one = np.float64(1)
r = np.float64(12756.2)
nn = np.float64(99)

import pymap3d as pm
xym = 6378137.0
xym2 = 2*xym
zm = 6356752.3142451802
zm2 = 2*zm
def ll2ecef(lat,lon):
    x,y,z = pm.geodetic2ecef(lat, lon, 0)
    x = (x+xym)/xym2
    y = (y+xym)/xym2
    z = (z+zm)/zm2
    return x,y,z
def ecef2ll(x,y,z):
    x = x*xym2 - xym
    y = y*xym2 - xym
    z = z*zm2 - zm
    x,y,z = pm.ecef2geodetic(x,y,z)
    return x,y

def nzHaversine(y_true, y_pred):
#     y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
#     y_true = y_true * tf.cast(y_true != nn, tf.float64)
    observation = tf.stack([y_true[:,:,0]*latRange + extents[0], y_true[:,:,1]*lonRange + extents[2]],axis=2)*0.017453292519943295
    prediction = tf.stack([y_pred[:,:,0]*latRange + extents[0], y_pred[:,:,1]*lonRange + extents[2]],axis=2)*0.017453292519943295
    used = tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(y_true, axis=2),0), dtype=tf.float64), axis=1)
    used = tf.where(tf.equal(used, zero), one, used)
    dlat_dlon = (observation - prediction) / 2
    a = tf.sin(dlat_dlon[:,:,0])**2 + tf.cos(observation[:,:,0]) * tf.cos(prediction[:,:,0]) * tf.sin(dlat_dlon[:,:,1])**2
    c = tf.asin(tf.sqrt(a))*r
    return tf.reduce_sum((tf.reduce_sum(c, axis=1))/used) / tf.dtypes.cast(tf.shape(observation)[0], dtype=tf.float64)

def nzTime(y_true, y_pred):
    y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
    y_true = y_true * tf.cast(y_true != nn, tf.float64)
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(y_true, zero), dtype=tf.float64), axis=1)
    used = tf.where(tf.equal(used, zero), one, used)
    diffs = tf.math.abs(tf.squeeze(y_pred)-y_true)*timeNormalize
    diffs = (tf.squeeze(y_pred)-y_true)*timeNormalize
    diffs = tf.reduce_sum(tf.reduce_sum(diffs, axis=1)/used)
    return diffs/tf.dtypes.cast(tf.shape(y_true)[0], dtype= tf.float64)

def nzMSE(y_true, y_pred):
    y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
    y_true = y_true * tf.cast(y_true != nn, tf.float64)
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(y_true,0), dtype=tf.float64), axis=1)
    used = tf.where(tf.equal(used, zero), one, used)
    return K.mean(tf.reduce_sum(K.square(tf.squeeze(y_pred)-y_true),axis=1)/used)

def nzBCE(y_true, y_pred):
    y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
    y_true = y_true * tf.cast(y_true != nn, tf.float64)
    used = maxArrivals - tf.reduce_sum(tf.cast(tf.equal(y_true,0), dtype=tf.float64), axis=1)
    used = tf.where(tf.equal(used, zero), one, used)
    return K.mean(BCE(y_true, y_pred)/used)

def nzAccuracy(y_true, y_pred):
    used = matrixSize/(tf.reduce_sum(tf.cast(tf.greater(tf.reduce_sum(y_true, axis=1), zero), dtype=tf.float64), axis=1)**2)
    used = tf.where(tf.equal(used, zero), one, used)
    acc = tf.reduce_sum(tf.cast(y_true==tf.round(y_pred), dtype=tf.float64),axis=(1,2))/matrixSize
    return K.mean(acc*used - used + 1)

def nzRecall(y_true, y_pred):
    y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
    y_true = y_true * tf.cast(y_true != nn, tf.float64)
#     y_true = K.ones_like(y_true)
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    return true_positives / (all_positives + K.epsilon())

def nzPrecision(y_true, y_pred):
    y_pred = y_pred * tf.cast(y_true != nn, tf.float64)
    y_true = y_true * tf.cast(y_true != nn, tf.float64)
#     y_true = K.ones_like(y_true)
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    return true_positives / (predicted_positives + K.epsilon())

In [601]:
#MatrixLinkTrainer
import tensorflow as tf
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import Input, Embedding, Reshape, concatenate, Dense, Bidirectional, GRU, MultiHeadAttention, LayerNormalization, Lambda
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
from tensorflow.keras.backend import clip
import logging
import json

@tf.autograph.experimental.do_not_convert
def MatrixLink(params):
    logging.getLogger("tensorflow").setLevel(logging.ERROR)
    def buildModel(modelArch):
        outputs = []
        inputs = []
        numericalInputs = Input(shape=(None,5), name='numerical_features')
        outputs.append(numericalInputs)
        inputs.append(numericalInputs)
        categoricalInputs = Input(shape=(None,1), name='phase')
        embed = Embedding(11, 4, trainable=True, embeddings_initializer=RandomNormal())(categoricalInputs)
        embed = Reshape(target_shape=(-1, 4))(embed)
        outputs.append(embed)
        inputs.append(categoricalInputs)
        outputs = concatenate(outputs)

        def TransformerBlock(inputs, embed_dim, ff_dim, num_heads=2, rate=0.1, eps=1e-6):
            attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(inputs, inputs)
#             attn_output = Dropout(rate)(attn_output)
            out1 = LayerNormalization(epsilon=eps)(inputs + attn_output)
            ffn_output = Dense(ff_dim, activation="relu")(out1)
            ffn_output = Dense(embed_dim)(ffn_output)
#             ffn_output = Dropout(rate)(ffn_output)
            return LayerNormalization(epsilon=eps)(out1 + ffn_output) 

        for d1Units in modelArch['dense']:
            outputs = Dense(units=d1Units, activation=tf.nn.relu)(outputs)
        transformerOutputs = outputs
        gruOutputs = outputs

        for tUnits in modelArch['transformers']:
            transformerOutputs = TransformerBlock(transformerOutputs, d1Units, tUnits, modelArch['heads'])
        for gUnits in modelArch['grus']:
            gruOutputs = Bidirectional(GRU(gUnits, return_sequences=True))(gruOutputs)

        outputs = concatenate([transformerOutputs, gruOutputs], axis=2)
        for tUnits in modelArch['transformers']:
            outputs = TransformerBlock(outputs, d1Units+gUnits*2, tUnits, modelArch['heads'])

        association = Dense(units=params['maxArrivals'], activation=tf.nn.sigmoid, name='association')(outputs)
        location = Dense(units=3)(outputs)
        location = Lambda(lambda x: clip(x, 0, 1), name='location')(location)
        time = Dense(units=1, name='time')(outputs)
        
        model = Model(inputs=inputs, outputs=[association, location, time])
        losses = { 'association': nzBCE, 'location': nzMSE, 'time': nzMSE }
        weights = { 'association': 1.0, 'location': 1.0, 'time': 0.1 }
        metrics = { 'association': [nzAccuracy, nzPrecision, nzRecall] }
        model.compile(optimizer=Adam(clipnorm=0.00001), loss=losses, loss_weights=weights, metrics=metrics)
        return model

    model = buildModel(params['modelArch'])
    try:
        model.load_weights(params['model'])
        print("Loaded previous weights.")
    except Exception as e:
        print(e)
        print("No previous weights loaded.")
    print(model.summary())
    return model

class saveCb(Callback):
    def on_train_begin(self, logs=None):
        self.best = 100000000.
    def on_epoch_end(self, epoch, logs=None):
        if logs['loss'] < self.best:
            self.best = logs['loss']
            print('Saving best model with loss', self.best)
            modelName = 'E%03d L%.4f AL%.4f LL%.4f TL%.4f AA%.4f AP%.4f AR%.4f.h5' %\
                (epoch, logs['loss'], logs['association_loss'], logs['location_loss'], logs['time_loss'], logs['association_nzAccuracy'], logs['association_nzPrecision'], logs['association_nzRecall'])
            model.save("./Training/Models/IDC/"+modelName)

In [None]:
# tf.config.threading.set_intra_op_parallelism_threads(2)
# tf.config.threading.set_inter_op_parallelism_threads(2)

trainingEvents, trainingEventList = generateEventFile(params, trainingSet=True)
# validationEvents, validationEventList = generateEventFile(params)

generator = synthesizeEventsFromEventFile(params, trainingEvents, trainingEventList, trainingSet=True)
# generator = synthesizeEvents(params)
# vgen = synthesizeEventsFromEventFile(params, validationEvents, validationEventList)
# vgen = synthesizeEvents(params)

model = MatrixLink(params)
history = model.fit(generator,
#                  validation_data=vgen,
                 steps_per_epoch= params['samplesPerEpoch']/params['batchSize'],
#                  validation_steps = params['validationSamplesPerEpoch']/params['batchSize'],
                 epochs=params['epochs'],
                 callbacks=[saveCb(), EarlyStopping(monitor='loss', patience=50), CSVLogger('./Training/Models/IDC/logs2.csv', append = True)],
                 verbose=1)
# trainingResults(np.genfromtxt('./Training/Models/IDC/logs.csv', delimiter=',', names=True))

In [None]:
#MatrixLink
import tensorflow as tf
import numpy as np
import pandas as pd
import json
from collections import deque
from math import ceil
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
from tensorflow.keras.models import load_model
from obspy import UTCDateTime
from scipy.cluster.hierarchy import ward, fcluster
from scipy.spatial.distance import squareform
from Utils import nzBCE, nzMSE1, nzMSE2, nzHaversine, nzAccuracy, nzPrecision, nzRecall, nzTime, evaluate

# Build permutation lists and matrices to predict on
def permute(X):
    outerWindow = params['associationWindow']
    minArrivals = params['minArrivals']
    maxArrivals = params['maxArrivals']
    edgeWindow = outerWindow/5
    numWindows = ceil((X[:,3].max() + edgeWindow*2) / edgeWindow)
    start = -edgeWindow

    innerWindows = deque()
    X_perm = deque()
    for window in range(numWindows):
        print('\rCreating permutations... ' + str(window) + ' / ' + str(numWindows), end='')
        end = start+outerWindow
        windowArrivals = np.where((X[:,3] >= start) & (X[:,3] < end))[0]
        start += edgeWindow
        if len(windowArrivals) >= minArrivals:
            X_perm.append(windowArrivals[:maxArrivals])
            innerWindows.append(start)
    X_test = np.zeros((len(X_perm),maxArrivals,6))
    for i in range(len(X_perm)):
        X_test[i,:len(X_perm[i])] = X[X_perm[i]]
        X_test[i,:len(X_perm[i]),3] -= X_test[i,0,3]
    X_test[:,:,3] /= params['timeNormalize']
    return X_perm, X_test, innerWindows

def buildEvents(X, labels, X_perm, X_test, Y_pred, innerWindows):
    # Get clusters for predicted matrix at index i
    def cluster(i):
        valids = np.where(X_test[i][:,-1])[0]
        validPreds = Y_pred[0][i][valids,:len(valids)]
        L = 1-((validPreds.T + validPreds)/2)
        np.fill_diagonal(L,0)
        return fcluster(ward(squareform(L)), params['clusterStrength'], criterion='distance')

    innerWindow = params['associationWindow'] * (3/5)
    minArrivals = params['minArrivals']
    catalogue = pd.DataFrame(columns=labels.columns)
#     events = deque()
    evid = 1
    created = 1
    for window in range(len(X_perm)):
        clusters = cluster(window)
        for c in np.unique(clusters):
            pseudoEventIdx = np.where(clusters == c)[0]
            pseudoEvent = X_perm[window][pseudoEventIdx]
            if len(pseudoEvent) >= minArrivals:
                event = X[pseudoEvent]
                # check for containment within inner window
                contained = (event[0,3] >= innerWindows[window]) & (event[-1,3] <= (innerWindows[window]+innerWindow))
                if contained:
                    candidate = labels.iloc[pseudoEvent].copy()
                    try:
                        candidate['ETIME'] = candidate.TIME.min() + np.median(Y_pred[2][window][pseudoEventIdx][:]*params['timeNormalize'])
                    except:
                        candidate['ETIME'] = -1
                    candidate['PLAT'] = Y_pred[1][window][pseudoEventIdx][:,0]*latRange+extents[0]
                    candidate['PLON'] = Y_pred[1][window][pseudoEventIdx][:,1]*lonRange+extents[2]
#                     candidate['LAT'] = np.median(Y_pred[1][window][pseudoEventIdx][:,0])*latRange+extents[0]
#                     candidate['LON'] = np.median(Y_pred[1][window][pseudoEventIdx][:,1])*lonRange+extents[2]
                    candidate['LAT'] = np.median(candidate.PLAT)
                    candidate['LON'] = np.median(candidate.PLON)
                    # check for existence in catalogue
                    overlap = candidate.ARID.isin(catalogue.ARID).sum()
                    if overlap == 0:
                        print("\rPromoting event " + str(created), end='')
#                         events.append(pseudoEvent)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
                        created += 1
                    elif len(pseudoEvent) > overlap:
                        catalogue.drop(catalogue[catalogue.ARID.isin(candidate.ARID)].index, inplace=True)
                        candidate.EVID = evid
                        catalogue = catalogue.append(candidate)
                        evid += 1
    catalogue = catalogue.groupby('EVID').filter(lambda x: len(x) >= minArrivals)
    print()
    return catalogue

def matrixLink(X, labels, denoise=False):
    print("Creating permutations... ", end='')
    X_perm, X_test, innerWindows = permute(X)
    print("\nMaking initial predictions... ", end='')
    Y_pred = model.predict({"phase": X_test[:,:,4], "numerical_features": X_test[:,:,[0,1,2,3,5]]})
    if denoise:
        print("\nEliminating noise and predicting again... ", end='')
        for _ in range(3):
            valids = deque()
            for i in range(len(X_perm)):
                noise = np.where(Y_pred[2][i] > 0.008)[0]
                valids.append(np.delete(X_perm[i], noise[noise < len(X_perm[i])]))
            valids = np.array(list(set(np.concatenate(valids))))
            X = X[valids]
            labels = labels.iloc[valids]
            X_perm, X_test, innerWindows = permute(X)
            Y_pred = model.predict({"phase": X_test[:,:,4], "numerical_features": X_test[:,:,[0,1,2,3,5]]})
    print("clustering and building events...")
    catalogue = buildEvents(X, labels, X_perm, X_test, Y_pred, innerWindows)
    return catalogue

def processInput():
    print("Reading input file... ", end='')
    X = []
    labels = []
    for i, r in inputs.iterrows(): # I can do this better
        phase = phases[r.PHASE]
        time = UTCDateTime(r.TIME)
        lat = abs((r.ST_LAT - extents[0]) / latRange)
        lon = abs((r.ST_LON - extents[2]) / lonRange)
        x,y,z = ll2ecef(lat,lon)
        otime = time - UTCDateTime(0)
        try:
            arrival = [x, y, z, otime, phase, 1]
            X.append(arrival)
            labels.append(r)
        except Exception as e:
            print(e)
    X = np.array(X)
    idx = np.argsort(X[:,3])
    X = X[idx,:]
    X[:,3] -= X[0,3]
    labels = pd.DataFrame([labels[i] for i in idx])
    print("%d arrivals found" % len(labels))
    return X, labels

if __name__ == "__main__":
    pd.options.display.float_format = "{:.2f}".format
#     with open("Parameters.json", "r") as f:
#         params = json.load(f)
    phases = params['phases']
    extents = np.array(list(params['extents'][params['location']].values())+[params['maxDepth'],params['maxStationElevation']])
    latRange = abs(extents[1] - extents[0])
    lonRange = abs(extents[3] - extents[2])
    model = load_model(params['model'], custom_objects={'nzBCE':nzBCE, 'nzMSE':nzMSE2, 'nzMSE1':nzMSE1, 'nzMSE2':nzMSE2, 'nzHaversine':nzHaversine, 'nzPrecision':nzPrecision, 'nzRecall':nzRecall, 'nzAccuracy':nzAccuracy, 'nzTime':nzTime}, compile=True)

    inFiles = ['./Inputs/IDC Test.gz']
    inFiles = ['./Inputs/IDC 10-20.gz']
    denoise = False
    evals = {file:[] for file in inFiles}
    for i in range(len(inFiles)):
        inputs = pd.read_pickle(inFiles[i]).sort_values(by=['TIME']).reset_index(drop=True)
        params['evalInFile'] = inFiles[i]
        start = inputs[inputs.TIME >= inputs.TIME.quantile(.8)].index[0]
        end = inputs[inputs.TIME >= inputs.TIME.quantile(.802)].index[0]
        inputs = inputs[start:end]

        X, labels = processInput()
        outputs = matrixLink(X, labels, denoise)
        outputs.to_pickle(params['evalOutFile'])
        evals[inFiles[i]] = evaluate(params, inputs, outputs, verbose=False)

    print("Consolidated summary for:", params['model'])
    print('File\tAHM\t Location')
    for file in evals.keys():
        print(file[-5:-3], "{:8.2f}".format(evals[file][0]), "{:8.2f}".format(evals[file][1]))

Reading input file... 16451 arrivals found
Creating permutations... 742 / 743
Making initial predictions... clustering and building events...
Promoting event 1132
Matching event 1485 / 1485        
Evaluating event 893 / 1485                                                                               

In [646]:
# event = outputs[outputs.EVID == 40]
# dists = haversine(event.PLAT, event.PLON, event.LAT.iloc[0], event.LON.iloc[0])
# print(event.iloc[np.where(dists < (dists.mean()+dists.std()))[0]])

def cluster(i):
    valids = np.where(X_test[i][:,-1])[0]
    validPreds = Y_pred[0][i][valids,:len(valids)]
    L = 1-((validPreds.T + validPreds)/2)
    np.fill_diagonal(L,0)
    return fcluster(ward(squareform(L)), 0.9, criterion='distance')
associationWindow = 3600
i=6
evids = cluster(i)
evids = [evids[index] for index in sorted(np.unique(evids, return_index=True)[1])]
nevids = {evids[e]:e+1 for e in range(len(evids))}
evids = cluster(i)
predicted = np.array([nevids[evid] for evid in evids])

window = inputs[(inputs.TIME >= inputs.TIME.min() + innerWindows[i] - associationWindow*1/5) & (inputs.TIME < inputs.TIME.min() + innerWindows[i] + associationWindow*4/5)].EVID
evids = window.unique()
evids = [evids[index] for index in sorted(np.unique(evids, return_index=True)[1])]
nevids = {evids[e]:e+1 for e in range(len(evids))}
labels = np.array([nevids[evid] for evid in window.values])
print(labels)
print(predicted)
print(np.unique(labels, return_counts=True))
print(np.unique(predicted, return_counts=True))

[ 1  1  1  2  1  1  1  2  2  1  2  2  2  3  3  3  2  3  3  3  2  1  4  5  5  6  1  4  6  6  4  4  4  4  4  4  5  5  4  4  4  5  7  7  1  8  7  7  8  4  7  8  1  8  9  9  9  1  9  1 10 10 11 11 12 10 12 10 11 11 13 12 13 12  1 12 13 13 13  1 14 14 15  4 16 14 17 16 15 16 16 16 16 16 17 14 14 16 16 15 17 17]
[1 1 1 2 1 1 1 2 2 1 2 2 2 3 3 3 2 3 3 3 2 1 4 5 5 2 1 4 2 2 4 4 4 4 4 5 5 5 4 4 4 5 6 6 1 6 6 6 6 5 6 6 1 6 7 7 7 1 7 1 7 7 7 7 8 7 8 7 7 7 8 8 8 8 1 8 8 8 8 1 9 9 9 5 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9]
(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17]), array([15,  8,  6, 13,  5,  3,  5,  4,  4,  4,  4,  5,  5,  5,  3,  9,  4]))
(array([1, 2, 3, 4, 5, 6, 7, 8, 9]), array([15, 11,  6, 10,  8,  9, 12, 10, 21]))


In [555]:
import pymap3d as pm
xym = 6378137.0
xym2 = 2*xym
zm = 6356752.3142451802
zm2 = 2*zm
def ll2ecef(lat,lon):
    x,y,z = pm.geodetic2ecef(lat, lon, 0)
    x = (x+xym)/xym2
    y = (y+xym)/xym2
    z = (z+zm)/zm2
    return x,y,z
def ecef2ll(x,y,z):
    x = x*xym2 - xym
    y = y*xym2 - xym
    z = z*zm2 - zm
    x,y,z = pm.ecef2geodetic(x,y,z)
    return x,y

lat, lon = 56.43, 58.56
x,y,z = ll2ecef(lat, lon)
print(lat, lon)
print(x,y,z)
print(ecef2ll(x,y,z))

56.43 -58.56
0.644548535338 0.263562816995 0.916176860751
(56.430000000000007, -58.560000000000024)


In [564]:
# from tensorflow.keras.models import load_model
# idc = pd.read_pickle("./Inputs/IDC 10-20.gz")
# idc = idc.drop_duplicates('ARID').sort_values(by='TIME')
# idc = idc[idc.IPHASE.isin(idc.IPHASE.value_counts()[0:11].keys())]
# window = 3600
# start = idc.TIME.min()
# counts = []
# for h in range(int((idc.TIME.max() - start) / window)+1):
#     counts.append(len(idc[(idc.TIME >= start) & (idc.TIME < start+window)]))
#     start += window
# counts = np.array(counts)
# counts.max()
# generator = synthesizeEventsFromEventFile(params, trainingEvents, trainingEventList, trainingSet=True)
# test = next(generator)
# total = params['batchSize']*params['maxArrivals']*params['maxArrivals']
# padding = np.sum(test[0]['numerical_features'][:,:,3]==0)*params['maxArrivals']
# positiveProp = np.sum(test[1]==1) / (total-padding)
# y = np.bincount(test[0]['phase'].flatten().astype(int))
# y[0] -= padding/params['maxArrivals']
# ii = np.nonzero(y)
# phaseProportions = np.vstack((ii,y[ii]/((total-padding)/params['maxArrivals']))).T
# print("Total:",total)
# print("Padding:",padding,padding/total)
# print("Ones:",np.sum(test[1]==1),positiveProp)
# print("Phases:\n",phaseProportions)

