In [None]:
#Cleaning IR data
site1 = pd.read_pickle("./Archive/site_1_cat.gz")
site1 = site1[site1.ST_LAT.notna()]
site1 = site1[site1.TIME > 0]
site1 = site1[site1.IPHASE.str[0].isin(['P','S'])]
site1 = site1.drop_duplicates(subset=['EVID', 'STA', 'IPHASE', 'TIME']).sort_values(by=['TIME', 'ARID'])
site1 = site1.groupby('EVID').filter(lambda x: (x.TIME.max() - x.TIME.min() < 600))
site1.to_pickle("site1cleaned.gz")

#Splitting IR data to generate/test
inputPicks.sort_values(by=['EVID', 'TIME']).iloc[np.r_[0:57013, 59513:171557, 174057:228712]].sort_values(by=['TIME']).to_pickle("site1cleanedTRAIN.gz")
inputPicks.sort_values(by=['EVID', 'TIME']).iloc[np.r_[57013:59513, 171557:174057]].sort_values(by=['TIME']).to_pickle("site1cleanedTEST.gz")

In [None]:
#training matrix to GPD output
from obspy import UTCDateTime
import datetime
startTime = UTCDateTime(datetime.datetime.now()) #change later
stations = np.load(params["station_map_file"],allow_pickle=True)
reverseStationMap = {v: (k[0],k[1]) for k,v in stations.items() if k[2] == 'P'}
phase_idx = {0: 'P', 1: 'S'}
event_time = params["t_win"]
picks = []
labels = []
progress = 1
for event in X:
    for pick in event:
        if pick[4] == 0:
            break
        network, station = reverseStationMap[tuple(pick[0:2])]
        pickTime = startTime + pick[2]*event_time
        pickTimeEpoch = pickTime - UTCDateTime(0)
        picks.append(np.append(pick[0:2], [pickTimeEpoch, pick[3], 1.0, 1.0]))
        labels.append("%s %s %s %s" % (network, station, phase_idx[pick[3]], pickTime))
    startTime += event_time #remove later
    print(progress, end='\r')
    progress += 1
picks = np.array(picks)
idx = np.argsort(picks[:,2])
picks = picks[idx,:]

In [None]:
#training matrix to GPD input
from obspy import UTCDateTime
import datetime
startTime = UTCDateTime(datetime.datetime.now())
picks = []
for event in X[0:10]:
    for pick in event:
        if pick[4] == 0:
            break
        network, station = [k for k, v in stations.items() if v == tuple(pick[0:2])][0][0:2]
        phase = "P" if pick[3] == 0.0 else "S"
        pickTime = startTime + pick[2]*params["t_win"]
        pickTimeEpoch = pickTime - UTCDateTime(0)
        picks.append("%s %s %s %s %s %s\n" % (network, station, phase, pickTime, 1.0, 1.0))

In [None]:
#training matrix to GPD labels
startTime = UTCDateTime(datetime.datetime.now())
labels = []
for event in X[0:10]:
    for pick in event:
        if pick[4] == 0:
            break
        network, station = [k for k, v in stations.items() if v == tuple(pick[0:2])][0][0:2]
        labels.append("%s %s %s %s" % (network, station, "P" if pick[3] == 0.0 else "S", startTime + pick[2]*params["t_win"]))

In [None]:
def scaleDate(picks, scaleTo = 100):
    first = datetime.datetime.fromtimestamp(picks.TIME.iloc[0])
    fullDays = (datetime.datetime.fromtimestamp(picks.TIME.iloc[-1]) - first).days

    def doScale(pickTime):
        dayDiff = (pickTime - first).days
        modDay = -(dayDiff - int(dayDiff / fullDays * scaleTo))
        return int((pickTime + datetime.timedelta(days=modDay)).timestamp())
    picks.TIME = picks.TIME.apply(lambda t: doScale(datetime.datetime.fromtimestamp(t)))

# Compressing dates
# scaleDate(site1, 1)
# dateGroups = site1.groupby(site1.TIME.apply(lambda x: datetime.datetime.fromtimestamp(x).date()))
# for date, picks in dateGroups:
#     print(date, picks.EVID.nunique())
# site1.to_pickle("site1TESTscaled.gz")

In [None]:
def fakeMaker(fakeArrivalFrac = 0.25):
    arrivals = pd.read_pickle(params['evalInFile'])
    fakeArrivals = []
    minTime = arrivals.TIME.min()
    maxTime = arrivals.TIME.max()
    minEvLat = arrivals.EV_LAT.min()
    maxEvLat = arrivals.EV_LAT.max()
    minEvLon = arrivals.EV_LON.min()
    maxEvLon = arrivals.EV_LON.max()
    ARID = 0
    ORID = 0.0
    EV_TIME = 0.0
    EVID = -1.0
    EV_DEPTH = 0.0

    numFakeArrivals = int((len(arrivals)*fakeArrivalFrac)/(1-fakeArrivalFrac))
    times = arrivals.TIME.sample(int(numFakeArrivals*.85)+1).values
    times += np.random.randint(-4, 4, size = int(numFakeArrivals*.85)+1)
    times = np.concatenate((times, np.random.randint(minTime, maxTime, size = int(numFakeArrivals*.15)+1)))
    MBs = np.round(np.random.uniform(0, 2, size = numFakeArrivals), 1)
    lats = np.round(np.random.uniform(minEvLat, maxEvLat, size = numFakeArrivals), 4)
    lons = np.round(np.random.uniform(minEvLon, maxEvLon, size = numFakeArrivals), 4)
    for i in range(numFakeArrivals):
        print("\rGenerating fake arrival " + str(i+1) + " / " + str(numFakeArrivals), end='')
        ARID -= 1
        sample = arrivals.sample().iloc[0]
        (fakeArrivals.append({
            "STA": sample.STA,
            "TIME": times[i],
            "ARID": ARID,
            "PHASE": sample.PHASE,
            "ST_LAT": sample.ST_LAT,
            "ST_LON": sample.ST_LON,
            "ST_ELEV": sample.ST_ELEV,
            "ORID": ORID,
            "EV_TIME": EV_TIME,
            "EVID": EVID,
            "EV_LAT": lats[i],
            "EV_LON": lons[i],
            "EV_DEPTH": EV_DEPTH,
            "MB": MBs[i],
        }))
    print()
    return arrivals.append(fakeArrivals).reset_index(drop=True).sort_values(by=['TIME'])

In [None]:
# Plotting phases by distance
rstt = pd.read_pickle('./Training/IR RSTT.gz')
ir = pd.read_pickle('./Inputs/IR.gz')
from geopy.distance import geodesic

rsttLgDists = []
for i, pick in rstt[rstt.PHASE == 'Lg'].iterrows():
    rsttLgDists.append(geodesic((pick.EVLAT, pick.EVLON), (pick.STALAT, pick.STALON)).km)
rsttLgDists

irSgDists = []
for i, pick in ir[ir.IPHASE == 'Sg'].iterrows():
    irSgDists.append(geodesic((pick.EV_LAT, pick.EV_LON), (pick.ST_LAT, pick.ST_LON)).km)

rsttLgDistsDropped = []
for k in range(0,50):
    print('\r' + str(k), end='')
    drops = np.random.binomial(1,rstt[rstt.PHASE == 'Lg'].DROPOUT)
    for i, pick in rstt[rstt.PHASE == 'Lg'][drops==1].iterrows():
        rsttLgDistsDropped.append(geodesic((pick.EVLAT, pick.EVLON), (pick.STALAT, pick.STALON)).km)
print(len(rsttLgDistsDropped))
plt.hist(irSgDists, bins=300, color='b', density=True)
plt.hist(rsttLgDists, bins=100, color='orange', density=True)
plt.hist(rsttLgDistsDropped, bins=300, color='r', density=True)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = [16, 12]
alpha=0.8
bins=100
xrange=[0,2100]
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.title('True Positive')
plt.legend(['Pg','Pn','Sg','Sn'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.title('True Negative')
plt.legend(['Pg','Pn','Sg','Sn'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.title('False Positive')
plt.legend(['Pg','Pn','Sg','Sn'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.title('False Negative')
plt.legend(['Pg','Pn','Sg','Sn'], fontsize=20)
plt.xlabel('Distance')
plt.figure()

In [None]:
plt.rcParams['figure.figsize'] = [16, 12]
alpha=0.8
bins=100
xrange=[0,2100]
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Pg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.title('Pg')
plt.legend(['TP','TN','FP','FN'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Pn')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.title('Pn')
plt.legend(['TP','TN','FP','FN'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Sg')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.title('Sg')
plt.legend(['TP','TN','FP','FN'], fontsize=20)
plt.xlabel('Distance')
plt.figure()
plt.hist(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:green', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'TN') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:blue', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FP') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:red', alpha=alpha, density=False)
plt.hist(events[(events.RULING == 'FN') & (events.PHASE == 'Sn')].DISTANCE, bins=bins, range=xrange, color='tab:orange', alpha=alpha, density=False)
plt.title('Sn')
plt.legend(['TP','TN','FP','FN'], fontsize=20)
plt.xlabel('Distance')
plt.figure()

In [None]:
# precisions = {}
# recalls = {}
# precisions['Pg'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')]) + len(events[(events.RULING == 'FP') & (events.PHASE == 'Pg')]) )
# precisions['Pn'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')]) + len(events[(events.RULING == 'FP') & (events.PHASE == 'Pn')]) )
# precisions['Sg'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')]) + len(events[(events.RULING == 'FP') & (events.PHASE == 'Sg')]) )
# precisions['Sn'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')]) + len(events[(events.RULING == 'FP') & (events.PHASE == 'Sn')]) )
# recalls['Pg'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Pg')]) + len(events[(events.RULING == 'FN') & (events.PHASE == 'Pg')]) )
# recalls['Pn'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Pn')]) + len(events[(events.RULING == 'FN') & (events.PHASE == 'Pn')]) )
# recalls['Sg'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Sg')]) + len(events[(events.RULING == 'FN') & (events.PHASE == 'Sg')]) )
# recalls['Sn'] = len(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')]) / ( len(events[(events.RULING == 'TP') & (events.PHASE == 'Sn')]) + len(events[(events.RULING == 'FN') & (events.PHASE == 'Sn')]) )
print(precisions)
print(recalls)
plt.bar(range(len(precisions)), precisions.values(), align='center')
plt.xticks(range(len(precisions)), list(precisions.keys()))
plt.figure()
plt.bar(range(len(recalls)), recalls.values(), align='center')
plt.xticks(range(len(recalls)), list(recalls.keys()))
plt.figure()

In [None]:
# Mapping evaluated picks
plt.rcParams['figure.figsize'] = [50, 600]
inputPicks = pd.read_pickle(params['eval_in_file'])
events = pd.read_pickle(params['pr_eval_out_file'])
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
colorMap = {'TP': 'tab:green', 'TN': 'tab:blue', 'FP': 'tab:red', 'FN': 'tab:orange'}
colorMapPhase = {'Pg': 'tab:green', 'Pn': 'tab:blue', 'P': 'tab:orange', 'Sg': 'tab:red', 'S': 'tab:red', 'Sn': 'tab:red'}
e = 0
for evid, picks in events.groupby('EVID'):
    times = picks.TIME.values
    start = inputPicks[inputPicks.EVID == picks.iloc[0].EVID].TIME.min()
    start = start if not math.isnan(start) else picks.TIME.min()
    times -= start
    colors = [colorMap[k] for k in picks.RULING.values]
#     colors = [colorMapPhase[k] for k in picks.PHASE.values]
    e += 1
    ys1 = [c*[e] for c in picks.TIME.value_counts().sort_index()]
    ys2 = [c*[e] for c in picks.TIME.value_counts().sort_index()]
    offsets2 = [list(i) for i in [range(1,len(l)+1) for l in ys2]]
    offsets = np.array([o/len(l) for l in offsets2 for o in l[::-1]])
    offsets2 = np.array([o/len(l) for l in offsets2 for o in l])
    ys1 = np.array([y+1 for l in ys1 for y in l]) - offsets
    ys2 = np.array([y for l in ys2 for y in l]) + offsets2
    plt.vlines(x=picks.TIME.values, ymin=ys1, ymax=ys2, colors=colors)

In [None]:
# Estimating event times from catologues
def get_TT(srcLatDeg, srcLonDeg, srcDepKm, rcvLatDeg, rcvLonDeg, rcvDepKm, phase, slbm):
#     phase = phases[phase]
    # create a great circle from source to the receiver
    slbm.createGreatCircle(phase,
        rstt.deg2rad(srcLatDeg),
        rstt.deg2rad(srcLonDeg),
        srcDepKm,
        rstt.deg2rad(rcvLatDeg),
        rstt.deg2rad(rcvLonDeg),
        rcvDepKm)

    # get the distance and travel time from source --> receiver
    travelTimeSec = slbm.getTravelTime()   # compute travel time (sec)
    return travelTimeSec

# instantiate an RSTT object
slbm = rstt.SlbmInterface()
# load the velocity model
slbm.loadVelocityModel(modelPath)

event_times = []
events = pd.read_pickle(params['training_generator_source_file']).sort_values(by=['EVID','TIME'])
for evid, picks in events.groupby('EVID'):
    pick = picks.iloc[0]
    phase = pick.PHASE
    if phase == 'Sg':
        phase = 'Lg'
    if phase == 'Pb':
        phase = 'Pg'
    try:
        tt = get_TT(pick.EV_LAT, pick.EV_LON, pick.EV_DEPTH, pick.STALAT, pick.STALON, -pick.STAELEV, phase, slbm)*-1
        event_times.extend([tt]*len(picks))
    except:
        event_times.extend([0]*len(picks))
events['EV_TIME'] = event_times
events[events.EV_TIME != 0].to_pickle('./S1+EV_TIME.gz')