## Notebook to compute ML picks for INGV events in Norcia Catalog

In [1]:
import time
import os
import obspy
from obspy import UTCDateTime
from obspy.core.event import  Event, Origin, Magnitude, Pick, WaveformStreamID
from obspy import Catalog
import pandas as pd
import pylab as plt
import numpy as np
from obspy import read_inventory, read_events
from obspy.clients.fdsn import Client
from obspy.clients.filesystem.sds import Client as sdsclient
from obspy.clients.fdsn import RoutingClient
from obspy.core import Trace, Stream, Stats
import glob

### Defs

In [2]:
def download(starttime, endtime, inventory):
    max_retry = 10
    stream = obspy.Stream()
    for network in inventory:
        for station in network:
            retry = 0
            while retry < max_retry:
                try:
                    ch=inventory.select(station=station.code).get_contents()['channels'][0].split('.')[-1][:2]+"?"
                    if network.code =="YR":
                        tmp = sdsYR.get_waveforms(
                            network=network.code, 
                            station=station.code, 
                            location="", 
                            channel=ch, 
                            starttime=starttime, 
                            endtime=endtime
                        )
                    else:
                        tmp = sds.get_waveforms(
                            network=network.code, 
                            station=station.code, 
                            location="", 
                            channel=ch, 
                            starttime=starttime, 
                            endtime=endtime
                        )
                    if len(tmp) > 0:
                        tmp.merge(method=0,fill_value=0)
                    for trace in tmp:
                        if trace.stats.sampling_rate != 100:
                            trace = trace.interpolate(100, method="linear")
                    stream += tmp
                    
                    break
                except Exception as err:
                    print("Error {}.{}: {}".format(network.code, station.code, err))
                    message = "No data available for request."
                    if str(err)[: len(message)] == message:
                        break
                    retry += 1
                    time.sleep(5)
                    continue
            if retry == max_retry:
                print(f"{fname}: MAX {max_retry} retries reached : {network.code}.{station.code}")
    stream.merge(method=0,fill_value=0)
    return stream

In [3]:

def create_zero_stream(network, station, channels, start_time, end_time, sampling_rate):
    stream = Stream()

    for channel in channels:
        stats = Stats()
        stats.network = network
        stats.station = station
        stats.channel = channel
        stats.sampling_rate = sampling_rate
        stats.starttime = UTCDateTime(start_time)

        trace_data = [0.0] * int((end_time - start_time) * sampling_rate)
        trace = Trace(data=np.array(trace_data), header=stats)

        stream.append(trace)

    return stream

In [4]:

def findpicks(pddataframe, picker,threshold,inventory):
    name="pn"
    Plist = []
    Slist = []
    mseedlist = []
    for i, row in pddataframe.iterrows():
        sta = row['Station']
        Pori = row['P pick']
        Sori = row['S pick']
        if pd.notna(Pori):
            t0 = Pori - 20
            t1 = Pori + 40
        else:
            t0 = Sori - 20
            t1 = Sori + 40

        inv = inventory.select(station=sta,starttime=t0, endtime=t1)
        for net in inv:
            if net.code == "8P":  # Mismatch between mseed and inventory
                net.code = "IV"

        mseed = download(t0, t1, inv)
        if len(mseed) == 0:
            channels=[]
            for net in inv:
                network = net.code
                for sta in net:
                    station = sta.code
                    for ch in sta:
                        channels.append(ch.code)
                                               
            start_time = t0
            end_time = t1
            sampling_rate = 100.0 

            mseed = create_zero_stream(network, station, channels, start_time, end_time, sampling_rate)

            
        mseedlist.append(mseed[0].get_id())
        for trace in mseed:
            if trace.stats.sampling_rate != 100:
                trace.resample(100.)

        picks = picker.classify(mseed, overlap=2800, stacking='max', P_threshold=threshold, S_threshold=threshold).picks
        # picks = picker.classify(mseed, P_threshold=threshold, S_threshold=threshold).picks
        deltap = 1e30
        deltas = 1e30
        PP = ''
        if pd.notna(Pori):  # Check if Pori is not NaN
            for p in picks:
                if p.phase == 'P':
                    if abs(Pori - obspy.UTCDateTime(p.peak_time)) < deltap:
                        PP = obspy.UTCDateTime(p.peak_time)
                        deltap = abs(Pori - PP)
        # Plist.append(PP)
            Plist.append(PP)
        else:
            Plist.append('')

        if pd.notna(Sori):  # Check if Sori is not NaN
            SS = ''
            for p in picks:
                if p.phase == 'S':
                    if abs(Sori - obspy.UTCDateTime(p.peak_time)) < deltas:
                        SS = obspy.UTCDateTime(p.peak_time)
                        deltas = abs(Sori - SS)
            Slist.append(SS)
        else:
            Slist.append('')

    namep = "P " + name
    names = "S " + name
    pddataframe['id'] = mseedlist
    # pddataframe['Dist'] = distlist
    pddataframe[namep] = Plist
    pddataframe[names] = Slist
    return pddataframe


In [5]:
def write_event(df_local, origintime, eve_id, savelocaldirectory):
    # print(origintime,test)
    timedelta = 2

    obsdir = "/home/jovyan/shared/users/spina/Norcia/github/OBS/"+savelocaldirectory+"/" #Your local directory to save the files for NLLoc

    if not os.path.exists(obsdir):
        os.makedirs(obsdir)

    cat = Catalog()
    cat.description = "Norcia_test"
    phaselist = ['P', 'S']

    e = Event()
    e.event_type = "Earthquake"
    e.resource_id = eve_id
    o = Origin()
    o.time = origintime

    for i, row in df_local.iterrows():
        if pd.notna(row['P pick']) and (row['P pn'] != ''):
            if abs(row['P pick'] - row['P pn']) <= timedelta:
                wav_id = WaveformStreamID(
                    station_code=row['Station'],
                    channel_code="Z",
                    network_code=row['id'].split('.')[0]
                )
                e.picks.append(Pick(
                    time=row['P pn'],
                    waveform_id=wav_id,
                    phase_hint='P',
                    evaluation_mode="automatic",
                    time_errors=0.02
                ))

        if pd.notna(row['S pick']) and (row['S pn'] != ''):
            if abs(row['S pick'] - row['S pn']) <= timedelta:
                wav_id = WaveformStreamID(
                    station_code=row['Station'],
                    channel_code="N",
                    network_code=row['id'].split('.')[0]
                )
                e.picks.append(Pick(
                    time=row['S pn'],
                    waveform_id=wav_id,
                    phase_hint='S',
                    evaluation_mode="automatic",
                    time_errors=0.04
                ))

    # print(e.picks)
    if len(e.picks) > 0:
        fileOBS = obsdir + "Norcia_test_" + str(origintime) + "_" + ".phs"
        e.write(fileOBS, format="NLLOC_OBS")

        with open(fileOBS, "r+") as f: s = f.read(); f.seek(0); f.write("PUBLIC_ID "+str(e.resource_id)+"\n" + s)
    else:
        print('No picks for event',e.resource_id)
        f = open('no_event.txt', 'a')
        f.write(str(e.resource_id)+"\n")
        f.close()


In [6]:

client = Client("INGV")
sds=sdsclient("/home/jovyan/data/sds/")
sdsYR=sdsclient("/home/jovyan/data/iris/")



In [11]:
starttime=UTCDateTime("2016-10-20T00:00:00")
endtime=UTCDateTime("2016-10-21T00:00:00")

print(starttime,endtime)
nday=int((endtime-starttime)/86400)

2016-10-20T00:00:00.000000Z 2016-10-21T00:00:00.000000Z


In [12]:
center = (13.1, 42.825)

## Read INGV Catalog

In [15]:
catINGV = read_events("./catalog_ingv.xml")
catINGV = Catalog(sorted(catINGV, key=lambda e: e.origins[0].time))

## Read Inventory

In [11]:
INVE='./INVENTORY/*.xml'

ii  = glob.glob(INVE)
inventory=obspy.Inventory()

for e in ii:
    inventory+=read_inventory(e)



In [12]:
stalist=set()
for net in inventory.select(channel="*Z"):
    for sta in net:
        stalist.add(sta.code)

### Load Seisbench model

In [13]:
import seisbench.models as sbm

picker_pno = sbm.PhaseNet.from_pretrained("original")
picker_pni = sbm.PhaseNet.from_pretrained("instance")

picker_pno.cuda()
picker_pni.cuda()
print(picker_pno.weights_docstring)

  model_weights = torch.load(f"{path_pt}")


Original PhaseNet model from Zhu et al. (2018). Originally published under MIT License. Original available at https://github.com/AI4EPS/PhaseNet/tree/master/model/190703-214543 . 

Converted to SeisBench by Jannes Münchmeyer (munchmej@univ-grenoble-alpes.fr) with help from Sacha Lapins, Yiyuan Zhong, and Jun Zhu


## Extract picks from INGV catalog correspondig to arrivals

In [14]:
for curreve in catINGV:
    staall = set([])
    ingvpick = []
    data_dict = {}
    event_id_str = str(curreve.resource_id)
    evento_id = event_id_str.split("eventId=")[-1]
    ori = curreve.origins[0]
    arrivals = ori.arrivals
    picks=curreve.picks
    t = ori.time
    lon = ori.longitude
    lat = ori.latitude
    dep = ori.depth
    inventory=read_inventory('./INVENTORY/inventory_ingv'+str(t.julday)+".xml")
    stalist=[]
    for net in inventory.select(channel="*Z"):
        for sta in net:
            stalist.append(sta.code)
    for ar in arrivals:
        if (ar.phase in ['P','S','Pn','Sn','Pg','Sg']):# and ar.time_weight >= 0.00001):
            pi = [p for p in picks if p.resource_id == ar.pick_id][0]
#             print(pi.waveform_id.station_code,pi.waveform_id.channel_code,pi.phase_hint)
            sta = pi.waveform_id.station_code
            staall.add(sta)
            if sta in stalist:
                ingvpick.append(pi)

    # Initialize the 'S pick' column with np.nan in the dictionary
    for pick in ingvpick:
        station_code = pick.waveform_id.station_code
        pick_time = pick.time

        # Check phase_hint and update the dictionary accordingly
        if (pick.phase_hint == 'P') or (pick.phase_hint == 'Pg') or (pick.phase_hint == 'Pn'):
            if station_code not in data_dict:
                data_dict[station_code] = {'Station': station_code, 'P pick': pick_time, 'S pick': np.nan}
            else:
                data_dict[station_code]['P pick'] = pick_time
        elif (pick.phase_hint == 'S') or (pick.phase_hint == 'Sg') or (pick.phase_hint == 'Sn'):
            if station_code not in data_dict:
                data_dict[station_code] = {'Station': station_code, 'P pick': np.nan, 'S pick': pick_time}
            else:
                data_dict[station_code]['S pick'] = pick_time


# Convert the dictionary to a list of dictionaries and create a DataFrame
    df_picks = pd.DataFrame(list(data_dict.values()))
    # print(df_picks)

## Define model a
    picker=picker_pni
    thresholds=0.05
    dirsave='PN_IN28_005'
# Now call phasenet to find the picks         
    dfn=findpicks(df_picks,picker,thresholds,inventory)

    write_event(dfn, ori.time, evento_id, dirsave)



In [15]:
dfn

Unnamed: 0,Station,P pick,S pick,id,P pn,S pn
0,ARVD,2016-10-20T00:16:57.090000Z,2016-10-20T00:17:06.990000Z,IV.ARVD..HHE,2016-10-20T00:16:56.240000Z,2016-10-20T00:17:06.210000Z
1,T1211,2016-10-20T00:16:54.190000Z,2016-10-20T00:17:02.190000Z,IV.T1211..EHE,2016-10-20T00:16:54.090000Z,2016-10-20T00:17:02.180000Z
2,RM33,2016-10-20T00:16:52.900000Z,2016-10-20T00:17:00.320000Z,IV.RM33..EHE,2016-10-20T00:16:52.850000Z,2016-10-20T00:17:00.250000Z
3,TERO,2016-10-20T00:16:52.660000Z,2016-10-20T00:16:59.410000Z,IV.TERO..HHE,2016-10-20T00:16:52.460000Z,2016-10-20T00:16:59.310000Z
4,T1217,2016-10-20T00:16:51.160000Z,2016-10-20T00:16:56.650000Z,IV.T1217..EHE,2016-10-20T00:16:51.110000Z,2016-10-20T00:16:56.700000Z
5,T1215,2016-10-20T00:16:50.840000Z,2016-10-20T00:16:56.100000Z,IV.T1215..EHE,2016-10-20T00:16:50.790000Z,2016-10-20T00:16:55.760000Z
6,SMA1,2016-10-20T00:16:50.890000Z,2016-10-20T00:16:56.000000Z,IV.SMA1..EHE,2016-10-20T00:16:50.850000Z,2016-10-20T00:16:56.020000Z
7,ARRO,2016-10-20T00:16:54.480000Z,,IV.ARRO..EHE,2016-10-20T00:16:54.140000Z,
8,CESI,2016-10-20T00:16:50.070000Z,2016-10-20T00:16:54.580000Z,IV.CESI..HHE,2016-10-20T00:16:50.010000Z,2016-10-20T00:16:54.550000Z
9,ATCC,2016-10-20T00:16:54.540000Z,,IV.ATCC..EHE,2016-10-20T00:16:54.490000Z,
