In [2]:
import os
import gc
import glob
import h5py
import numpy as np

from obspy import UTCDateTime
from obspy.taup import TauPyModel
from obspy.core.event import Catalog
from obspy.clients.fdsn import Client
from obspy.geodetics.base import locations2degrees, degrees2kilometers

# from das_util import next_power_of_2
# from das_util import fk_filter_2cones

In [10]:
def ak_catalog(t1, t2, lat0=59.86, lon0=-151.85, a=-1, b=0.65):
    '''
    In:  t1, t2: start and ending timestamps
         lat0,lon0: Reference point of DAS network
         a, b: simple GMM parameters
    Out: cat : USGS AK catalog meeting GMM threshold
         ptimes : absolute P arrival times
    '''
    events = []
    ptimes = []

    # Get local catalog
    catalog = Client('IRIS').get_events(
        starttime=t1,
        endtime=t2,
#        catalog='ak',
        includeallorigins=True,
        includeallmagnitudes=True)

    catalog.write("example.xml", format="QUAKEML")
    for event in catalog:
        lon = event.origins[0]['longitude']
        lat = event.origins[0]['latitude']
        dep = event.origins[0]['depth'] * 1e-3
        mag = event.magnitudes[0]['mag']
        distdeg = locations2degrees(lat0, lon0, lat, lon)
        distkm = degrees2kilometers(distdeg)
        rad = np.sqrt(distkm ** 2 + dep ** 2)

        if (mag - 10 ** (a + b * np.log10(rad)) >= 0):
            # model = TauPyModel(model='iasp91')
            # arr = model.get_travel_times(
            #     source_depth_in_km=dep,
            #     distance_in_degree=distdeg)

            # t0 = event.origins[0]['time']
            # ptimes.append(t0 + arr[0].time)
            # events.append(event)
            print(lon,lat,dep,mag)

    return Catalog(events=events), np.array(ptimes)

In [13]:
t1 = UTCDateTime("2023-12-01T00:00:00")
t2 = UTCDateTime("2023-12-31T00:00:00")
cat, ptimes = ak_catalog(t1, t2, a=-1.2, b=0.65)

-152.5977 59.4926 74.3 1.4
-153.4729 59.8152 132.6 3.1
-151.8942 59.9605 62.5 1.1
-151.1005 60.4995 53.7 1.3
-153.0548 59.5766 94.3 1.6
-151.809 59.9649 58.2 2.0
-150.6987 60.9387 45.5 1.9
-151.9061 61.0812 86.9 2.0
-151.8781 59.4866 59.7 1.3
-152.4645 60.1634 93.0 2.2
-152.2586 60.0498 67.7 1.5
-152.7753 59.73 81.3 1.4
-151.972 60.2506 67.4 1.4
-152.2066 59.613 74.7 1.8
-153.0291 59.5296 85.10000000000001 1.6
-151.6023 60.8926 76.2 2.2
-152.3255 60.1692 95.4 1.5
-150.4775 60.0718 63.2 1.4
-152.3852 61.0314 107.9 2.0
-150.9086 58.2769 22.6 2.3
-151.8798 60.4857 78.0 1.4
-151.7136 59.5779 53.7 1.3
-153.0441 60.2373 137.6 1.8
-152.9391 59.323 66.8 1.8
-152.115 59.2201 60.800000000000004 1.5
-152.5176 59.1434 73.9 1.6
-152.6771 60.4836 112.8 1.6
-151.9684 60.4783 75.8 1.4
-151.9325 60.1468 69.0 1.2
-152.2068 59.1704 55.4 1.4
-150.4252 60.0421 50.5 1.4
-153.4263 60.0735 163.3 1.9
-152.7311 60.0911 86.5 1.6
-150.3639 60.622 40.800000000000004 2.0
-151.4047 60.0945 64.9 1.3
-152.7762 60.0598

In [5]:
len(cat.events), len(ptimes)

(292, 292)

In [4]:
def ak_record_lists(rec_dir, format_part, format_full, times, catalog):
    '''
    In:  rec_dir : path to the raw data
         format_part/full: file name format
         times: event first arrival time
    Out: elist : list of files records events
         nlist : list of files of noises
    '''
    elist = []
    nlist = []
    events = []

    for t_arrival, eve in zip(times, catalog):
        fname = UTCDateTime.strftime(t_arrival, format=format_part)
        print(rec_dir + fname)
        try:
            fname = os.path.basename(glob.glob(rec_dir + fname)[0])
        except:
            continue
        t_file = UTCDateTime.strptime(fname, format=format_full)
        if (t_arrival - t_file) > 0:
            t_eq = t_file
        else:
            t_eq = t_file - 60
        t_no = t_eq - 60

        fname = UTCDateTime.strftime(t_eq, format=format_part)
        eq_file = os.path.basename(glob.glob(rec_dir + fname)[0])
        fname = UTCDateTime.strftime(t_no, format=format_part)
        no_file = os.path.basename(glob.glob(rec_dir + fname)[0])

        elist.append(os.path.join(rec_dir, eq_file))
        nlist.append(os.path.join(rec_dir, no_file))
        events.append(eve)

    return elist, nlist, Catalog(events=events)

In [5]:
def taper_axis2(arr):
    m, n = arr.shape
    taper_window = np.hanning(n)
    tapered_arr = arr * taper_window

    return tapered_arr

In [6]:
def dataprep_akdas(outdir, seis_arrays, rec_dirs, format_part, format_full, times, catalog):
    for rec_dir, seis_array, f_part, f_full in zip(rec_dirs, seis_arrays, format_part, format_full):
        elist, nlist, cat = ak_record_lists(rec_dir, f_part, f_full, times, catalog)
        print(len(cat), len(elist))
        cat.write("ak_2024Jan.xml", format="QUAKEML")
        # if not len(elist) == len(nlist):
        #     print('Inconsistent number of quake and noise files')
        #     raise ValueError

        all_quake = np.zeros((len(elist), 7500, 1500), dtype=np.float32)
        # all_noise = np.zeros((len(nlist), 7500, 1500), dtype=np.float32)
        raw_quake = np.zeros((len(elist), 7500, 1500), dtype=np.float32)

        for i, (eq_file, no_file) in enumerate(zip(elist, nlist)):
            with h5py.File(eq_file, 'r') as f:
                time_data = f['Acquisition']['Raw[0]']['RawData'][:1500, 100:7600]

            time_data = (time_data - np.mean(time_data)) / np.std(time_data)

            raw_quake[i, :, :time_data.shape[0]] = time_data.T

            # %% Use FK filter
            filt_cplx, mask_fk, fk2d = fk_filter_2cones(time_data,
                                                        w1=0.005,
                                                        w2=0.25,
                                                        cone1=True,
                                                        cone2=True)
            time_data = filt_cplx.real

            all_quake[i, :, :time_data.shape[0]] = time_data.T

        today = UTCDateTime.strftime(UTCDateTime.now(), format='%Y_%m_%d')
        with h5py.File(outdir + '/' + seis_array + 'till' + today + '.hdf5', 'w') as f:
            f.create_dataset("fk_quake", data=all_quake)
            f.create_dataset("raw_quake", data=raw_quake)

In [None]:
## main function to generate training dataset
seis_arrays = ['KKFLS', 'TERRA']
rec_dirs = ['/mnt/qnap/KKFL-S_FIberA_25Hz/', '/mnt/qnap/TERRA_FiberA_25Hz/']
# seis_arrays = ['TERRA']
# rec_dirs = ['/mnt/qnap/TERRA_FiberA_25Hz/']
format_part = ['decimator2_%Y-%m-%d_%H.%M.??_UTC.h5', 'decimator2_%Y-%m-%d_%H.%M.??_UTC.h5']
format_full = ['decimator2_%Y-%m-%d_%H.%M.%S_UTC.h5', 'decimator2_%Y-%m-%d_%H.%M.%S_UTC.h5']
outdir = '/mnt/disk2/qibin_data'

t1 = UTCDateTime("2023-07-01T00:00:00")
t2 = UTCDateTime("2023-07-04T00:00:00")
cat, ptimes = ak_catalog(t1, t2, a=-1.2, b=0.65)
dataprep_akdas(outdir, seis_arrays, rec_dirs, format_part, format_full, ptimes, cat)