In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import h5py
import obspy
from obspy import Stream
from obspy.clients.fdsn import Client
from pnwstore.mseed import WaveformClient
from obspy import Stream
from datetime import timedelta
from tqdm import tqdm
import csv
import random
from itertools import islice
import time 

In [15]:
#--------------Initiate clients and constants----------------#
# Define clients
client_iris = Client("IRIS")
client_ncedc = Client("NCEDC")
client_waveform = WaveformClient()

# Define constants
sampling_rate = 100  # Hz
pre_arrival_time = 50
window_length = 150

# Load the arrival table and define the output file names
assoc_df = pd.read_csv('/wd1/hbito_data/data/datasets_all_regions/arrival_assoc_origin_2010_2015_reloc_cog_ver3.csv', index_col=0)
output_waveform_file = "/wd1/hbito_data/data/datasets_all_regions/waveforms_HH_BH_on_the_fly_bulk_backup2.h5"
output_metadata_file = "/wd1/hbito_data/data/datasets_all_regions/metadata_HH_BH_on_the_fly_bulk_backup2.csv"
error_log_file = "/wd1/hbito_data/data/datasets_all_regions/save_errors_on_the_fly_bulk_backup2.csv"

# Preprocess dataframe
assoc_df[['network', 'station']] = assoc_df['sta'].str.split('.', expand=True)
assoc_df['event_id'] = 'ev' + assoc_df['otime'].astype(str).str.replace('.', '_')


  assoc_df['event_id'] = 'ev' + assoc_df['otime'].astype(str).str.replace('.', '_')


In [3]:
# Define the function to reorder the traces in a stream
def order_traces(stream: Stream, expected_len: int) -> np.ndarray:
    """
    Converts an ObsPy stream into a (3, expected_len) numpy array, 
    consistently ordered as [Z, E, N].

    Parameters:
    - stream: ObsPy Stream containing cleaned traces (padded to expected_len)
    - expected_len: Target length of each waveform trace

    Returns:
    - data_array: np.ndarray of shape (3, expected_len)
    """
    # Fixed component order: Z → 0, E → 1, N → 2
    comp_to_index = {"Z": 0, "E": 1, "N": 2}
    data_list = [np.zeros(expected_len) for _ in range(3)]  # Default to zeros

    for tr in stream:
        chan_suffix = tr.stats.channel[-1]
        if chan_suffix in comp_to_index:
            idx = comp_to_index[chan_suffix]
            data_list[idx] = tr.data  

    return np.vstack(data_list)  # Shape: (3, expected_len)

In [4]:
#--------------Gather Station Information----------------#
# Obtain unique network-station combinations
unique_ns = assoc_df.sta.unique()

# Define the start and end times for requesting station information
starttime_bulk = obspy.UTCDateTime("2010-01-01T00:00:00")
endtime_bulk = obspy.UTCDateTime("2015-12-31T23:59:59")

# Make a list of stations for bulk request 
bulk =[]
for u_ns in unique_ns:
    n,s = u_ns.split('.')

    for bi in ['EH?', 'BH?', 'HH?']:
        line = (n, s, '*', bi, starttime_bulk, endtime_bulk)
        bulk.append(line)

# Make a bulk request 
inv = client_iris.get_stations_bulk(bulk, level='channel')
time.sleep(0.2)

In [5]:
#--------------Gather Waveform Information----------------#
# Obtain uniquee otime-network-station combinations
unique_n_s_otime = assoc_df.drop_duplicates(['event_id', 'network', 'station'],keep='first').reset_index(drop=True)
unique_n_s_otime


Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,nass,p_picks,s_picks,rms,nsphz,gap,algorithm,network,station,event_id
0,UW.PCMD,1.262305e+09,0,P,0.680,0,P,0.049,46.888962,-122.301483,...,7,2,5,1.081,5.0,235.831208,genie,UW,PCMD,ev1262304917_262282
1,UW.RVW,1.262305e+09,1,P,0.680,0,P,1.264,46.149750,-122.742996,...,7,2,5,1.081,5.0,235.831208,genie,UW,RVW,ev1262304917_262282
2,UW.GNW,1.262305e+09,3,S,0.680,0,S,2.402,47.564130,-122.824980,...,7,2,5,1.081,5.0,235.831208,genie,UW,GNW,ev1262304917_262282
3,PB.B013,1.262305e+09,4,S,0.680,0,S,-0.651,47.813000,-122.910797,...,7,2,5,1.081,5.0,235.831208,genie,PB,B013,ev1262304917_262282
4,PB.B943,1.262305e+09,5,S,0.680,0,S,-0.511,47.813202,-122.911301,...,7,2,5,1.081,5.0,235.831208,genie,PB,B943,ev1262304917_262282
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
690483,7D.J11D,1.435102e+09,1004326,P,0.694,63886,P,-0.336,43.541599,-126.368599,...,9,4,5,0.447,5.0,247.683119,genie,7D,J11D,ev1435101498_841147
690484,7D.J19D,1.435102e+09,1004327,P,0.694,63886,P,-0.419,44.179001,-126.271202,...,9,4,5,0.447,5.0,247.683119,genie,7D,J19D,ev1435101498_841147
690485,7D.J10D,1.435102e+09,1004328,P,0.694,63886,P,-0.505,43.348499,-125.545097,...,9,4,5,0.447,5.0,247.683119,genie,7D,J10D,ev1435101498_841147
690486,7D.J27D,1.435102e+09,1004329,P,0.694,63886,P,0.915,44.848900,-126.308296,...,9,4,5,0.447,5.0,247.683119,genie,7D,J27D,ev1435101498_841147


In [6]:
# Define functions to append entries for the bulk request
def append_bulk_lists_chunks(bulk_waveforms, n, s, bi, trace_start, trace_end, day_end, next_day_start):
    """
    Append waveform requests to the bulk list based on the availability of HH? and BH? channels. If the stream runs over the midnight, split the request into two.
    """
    if day_end > trace_end:
        # If the trace end is within the same day, we can use HH?
        bulk_waveforms.append((n, s, '*', bi, trace_start, trace_end))
    else:
        # If the trace end goes beyond the day, we need to adjust
        bulk_waveforms.append((n, s, '*', bi, trace_start, day_end))
        bulk_waveforms.append((n, s, '*', bi, next_day_start, trace_end))
    return bulk_waveforms

In [7]:
def append_bulk_lists(bulk_waveforms, n, s, bi, trace_start, trace_end):
    """
    Append waveform requests to the bulk list based on the availability of HH? and BH? channels.
    """
    bulk_waveforms.append((n, s, '*', bi, trace_start, trace_end))

    return bulk_waveforms

In [None]:
# compose 
batches_bulk_waveforms_chunks =[]
batches_bulk_waveforms_chunks_ncedc =[]

batches_bulk_waveforms = []
num_batches = 10
len_batches = len(unique_n_s_otime) // num_batches

count_EH_pairs = 0

# Constants
sampling_rate = 100  # Hz
pre_arrival_time = 50
window_length = 150

for i in tqdm(range(0, num_batches+1)):
    bulk_waveforms_chunks = []
    bulk_waveforms_chunks_ncedc = []
    bulk_waveforms = []
    time.sleep(0.2)

    for index, u_ns in islice(unique_n_s_otime.iterrows(), i*len_batches, (i + 1) * len_batches):
        n,s = u_ns['network'], u_ns['station']

        otime = u_ns['otime']
        pick_time = u_ns['pick_time']
        trace_start = obspy.UTCDateTime(otime - pre_arrival_time)
        trace_end = trace_start + window_length

        day_end = obspy.UTCDateTime(trace_start.date + timedelta(days=1))-1e-6
        next_day_start = obspy.UTCDateTime(trace_start.date + timedelta(days=1))

        # print(trace_start, trace_end)

        sta = inv.select(network=n, station=s, time=pick_time)

        has_Z = bool(sta.select(channel='??Z'))
        has_HH = bool(sta.select(channel='HH?'))
        has_BH = bool(sta.select(channel='BH?'))

        if not has_Z or not (has_HH or has_BH):
            count_EH_pairs += 1
            # print("count_EH_pairs", count_EH_pairs)
            continue
        
        if has_HH:
            if n in ['NC', 'BK']:
                bulk_waveforms_chunks_ncedc = append_bulk_lists_chunks(bulk_waveforms_chunks_ncedc, n, s, 'HH?', trace_start, trace_end, day_end, next_day_start)
            else:
                bulk_waveforms_chunks = append_bulk_lists_chunks(bulk_waveforms_chunks, n, s, 'HH?', trace_start, trace_end, day_end, next_day_start)
            
            bulk_waveforms = append_bulk_lists(bulk_waveforms, n, s, 'HH?', trace_start, trace_end)

        else:
            if n in ['NC', 'BK']:
                bulk_waveforms_chunks_ncedc = append_bulk_lists_chunks(bulk_waveforms_chunks_ncedc, n, s, 'BH?', trace_start, trace_end, day_end, next_day_start)
            else:
                bulk_waveforms_chunks = append_bulk_lists_chunks(bulk_waveforms_chunks, n, s, 'BH?', trace_start, trace_end, day_end, next_day_start)
            
            bulk_waveforms = append_bulk_lists(bulk_waveforms, n, s, 'BH?', trace_start, trace_end)

    batches_bulk_waveforms_chunks.append(bulk_waveforms_chunks)
    batches_bulk_waveforms_chunks_ncedc.append(bulk_waveforms_chunks_ncedc)
    batches_bulk_waveforms.append(bulk_waveforms)


  0%|          | 0/11 [00:00<?, ?it/s]

100%|██████████| 11/11 [06:21<00:00, 34.68s/it]


In [16]:
#--------------Create Waveform Datasets in batches----------------#
# Find entries that have already been processed
processed_keys = set()
if os.path.exists(output_metadata_file):
    processed_df = pd.read_csv(output_metadata_file)
    processed_keys = set(zip(processed_df['trace_start_time'], processed_df['station_network_code'], processed_df['station_code']))
    print(f"Loaded {len(processed_keys)} processed entries.")

In [17]:
# Open output files
h5f = h5py.File(output_waveform_file, "a")
meta_out = open(output_metadata_file, "a")
write_header = os.stat(output_metadata_file).st_size == 0 if os.path.exists(output_metadata_file) else True

In [None]:
# h5f.close()
# meta_out.close()

NameError: name 'h5f' is not defined

In [18]:
fieldnames = ['event_id', 'source_origin_time', 'source_latitude_deg', 'source_longitude_deg',
              'source_type', 'source_depth_km', 'preferred_source_magnitude', 'preferred_source_magnitude_type',
              'preferred_source_magnitude_uncertainty', 'source_depth_uncertainty_km', 'source_horizontal_uncertainty_km',
              'station_network_code', 'station_channel_code', 'station_code', 'station_location_code',
              'station_latitude_deg', 'station_longitude_deg', 'station_elevation_m', 'trace_name',
              'trace_sampling_rate_hz', 'trace_start_time', 'trace_S_arrival_sample', 'trace_P_arrival_sample',
              'trace_S_arrival_uncertainty_s', 'trace_P_arrival_uncertainty_s', 'trace_P_polarity',
              'trace_S_onset', 'trace_P_onset', 'trace_snr_db', 'source_type_pnsn_label',
              'source_local_magnitude', 'source_local_magnitude_uncertainty', 'source_duration_magnitude',
              'source_duration_magnitude_uncertainty', 'source_hand_magnitude', 'trace_missing_channel', 'trace_has_offset']

In [19]:
meta_writer = csv.DictWriter(meta_out, fieldnames=fieldnames)

if write_header:
    meta_writer.writeheader()

In [20]:
sampling_rate = 100  # Hz
window_length = 150  # seconds
expected_len = int(sampling_rate * window_length)

i_iter = 0

# for i in range(len(batches_bulk_waveforms)):
for i in range(1):
    print("Batch",i)
    batch_chunk = batches_bulk_waveforms_chunks[2]
    batch_chunk_ncedc = batches_bulk_waveforms_chunks_ncedc[2]
    batch = batches_bulk_waveforms[2]

    save_errors = []


    st = Stream()

    for j in range(len(batch_chunk)):
        n, s, loc, bi, trace_start_time, trace_end_time = batch_chunk[j]
        try: 
            st1 = client_waveform.get_waveforms(network=n, station=s, location=loc, channel=bi,
                                                starttime=trace_start_time, endtime=trace_end_time)
            st.extend(st1)
        except Exception as e:
            print(f"Error fetching waveforms for {n}.{s} {bi} from {trace_start_time} to {trace_end_time}: {e}")
            # Write error immediately
            continue
    print('Finished downloading from WaveformClient')

    for j in range(len(batch_chunk_ncedc)):
        n, s, loc, bi, trace_start_time, trace_end_time = batch_chunk_ncedc[j]
        try: 
            st2 = client_ncedc.get_waveforms(network=n, station=s, location=loc, channel=bi,
                                                 starttime=trace_start_time, endtime=trace_end_time)
            time.sleep(0.2)
            st.extend(st2)
        except Exception as e:
            print(f"Error fetching waveforms for {n}.{s} {bi} from {trace_start_time} to {trace_end_time}: {e}")
            # Write error immediately
            continue
    print('Finished downloading NCEDC')    

    

    # print("Requesting waveforms.")
    # if len(batch_chunk) != 0:
    #     st1 = client_waveform.get_waveforms_bulk(batch_chunk)
    #     time.sleep(0.2) # Stop the execution to avoid making too many requests to the server
    # if len(batch_chunk_ncedc) != 0:
    #     st2 = client_ncedc.get_waveforms_bulk(batch_chunk_ncedc)
    #     time.sleep(0.2) # Stop the execution to avoid making too many requests to the server
    # if len(st1) == 0 and len(st2) == 0:
    #     print(f"Batch {i+1} has no waveform requests.")
    #     continue

    time.sleep(0.2) # Stop the execution to avoid making too many requests to the server

    # st = st1.extend(st2) if len(st2) != 0 else st1

    for n_s_time in tqdm(batch[22095:22105]):
        i_iter += 1
        network, station, location, channel, trace_start_time, trace_end_time = n_s_time

        rows_sta  = assoc_df.loc[(assoc_df['sta'] == f"{network}.{station}") & (abs(assoc_df['otime'] - float(trace_start_time + timedelta(seconds=pre_arrival_time))) < 1)]
        

        p_arrival = rows_sta[rows_sta['iphase'] == 'P']
        s_arrival = rows_sta[rows_sta['iphase'] == 'S']

        key = (str(trace_start_time), network, station)
        if key in processed_keys:
            print(f"Skipping already processed entry: {key}")
            # time.sleep(0.2)
            continue

        # inv_n_s_time = inv.select(network=network, station=station, location=location, channel='*',
        #                            starttime=trace_start_time, endtime=trace_end_time)

        # inv_n_s_time = inv.select(network=network, station=station, location=location, channel='*')
        # print('inv_n_s_time', inv_n_s_time)
        st_n_s = st.select(id=f"{network}.{station}.*.{channel}",)
        # print('st_n_s', st_n_s)

        st_n_s_time = Stream([tr for tr in st_n_s if tr.stats.starttime > (trace_start_time-1) and tr.stats.endtime < (trace_end_time+1)]) # Tolerate the error of 1 second when selecting the traces in the stream for the specific time window
        st_n_s_time.merge(method=0, fill_value='interpolate')
        st_n_s_time.detrend()
        st_n_s_time.resample(sampling_rate)

        cleaned_stream = Stream()
        # print('st_n_s_time', st_n_s_time)
        for tr in st_n_s_time:
            trace_data = tr.data[:expected_len]
            if len(trace_data) < expected_len:
                trace_data = np.pad(trace_data, (0, expected_len - len(trace_data)), mode="constant") # Pads zeros at the end
            tr.data = trace_data
            cleaned_stream.append(tr)

        # print('cleaned_stream', cleaned_stream)

        _cleaned_stream = order_traces(cleaned_stream, expected_len)

        try:
            data = np.stack(_cleaned_stream, axis=0)
    #         data = np.stack([tr.data[:window_length * sampling_rate - 2] for tr in waveform], axis=0)
        except Exception as e:
            # Write error immediately
            file_exists = os.path.exists(error_log_file)
            with open(error_log_file, "a", newline="") as errfile:
                writer = csv.DictWriter(errfile, fieldnames=['i_iter', 'network', 'station', 'starttime', 'endtime', 'stage', 'error'])
                if not file_exists:
                    writer.writeheader()
                writer.writerow({'i_iter': i_iter, 'network': network, 'station': station, 'starttime': trace_start_time, 'endtime': trace_end_time, 'stage': 'metadata_write', 'error': str(e)})
            continue

        bucket = str(random.randint(0, 10))
        
        
        try:
            dset_path = f"/data/{bucket}"
            if dset_path not in h5f:
                h5f.create_dataset(dset_path, data=np.expand_dims(data, axis=0), maxshape=(None, *data.shape), chunks=True, dtype='float32')
                dataset_index = 0
            else:
                dset = h5f[dset_path]
                dataset_index = dset.shape[0]
                dset.resize((dataset_index + 1), axis=0)
                dset[dataset_index] = data
        except Exception as e:
            print(f"Error writing to HDF5 for bucket {bucket}: {e}")
            # Write error immediately
            file_exists = os.path.exists(error_log_file)
            with open(error_log_file, "a", newline="") as errfile:
                writer = csv.DictWriter(errfile, fieldnames=['i_iter', 'network', 'station', 'starttime', 'endtime', 'stage', 'error'])
                if not file_exists:
                    writer.writeheader()
                writer.writerow({'i_iter': i_iter, 'network': network, 'station': station, 'starttime': trace_start_time, 'endtime': trace_end_time, 'stage': 'metadata_write', 'error': str(e)})
            continue

        trace_name = f"{bucket}${dataset_index},:{data.shape[0]},:{data.shape[1]}"

        print(network, station, location, channel, trace_start_time, trace_end_time)
        # print(rows_sta)
        # print(rows_sta['lat'].iloc[0])
        # print(rows_sta['lat'].iloc[0])
        # print(rows_sta['lon'].iloc[0])
        # print(rows_sta['depth'].iloc[0])
        # print(s_arrival['pick_time'].iloc[0] if not s_arrival.empty else None)
        # print(inv_n_s_time[0][0].latitude)
        print(cleaned_stream[0].stats.channel[:-1])


        try:
            row = {
                'event_id': rows_sta['event_id'].iloc[0],
                'source_origin_time': rows_sta['otime'].iloc[0],
                'source_latitude_deg': rows_sta['lat'].iloc[0],
                'source_longitude_deg': rows_sta['lon'].iloc[0],
                'source_type': "earthquake",
                'source_depth_km': rows_sta['depth'].iloc[0],
                'preferred_source_magnitude': None,
                'preferred_source_magnitude_type': None,
                'preferred_source_magnitude_uncertainty': None,
                'source_depth_uncertainty_km': None,
                'source_horizontal_uncertainty_km': None,
                'station_network_code': network,
                'station_channel_code': cleaned_stream[0].stats.channel[:-1],
                'station_code': station,
                'station_location_code': "",
                'station_latitude_deg': None,
                'station_longitude_deg': None,
                'station_elevation_m': None,
                'trace_name': trace_name,
                'trace_sampling_rate_hz': sampling_rate,
                'trace_start_time': trace_start_time,
                'trace_S_arrival_sample': int((s_arrival['pick_time'].iloc[0] - (rows_sta['otime'].iloc[0] - pre_arrival_time)) * sampling_rate)if not s_arrival.empty else None,
                'trace_P_arrival_sample': int((p_arrival['pick_time'].iloc[0] - (rows_sta['otime'].iloc[0] - pre_arrival_time)) * sampling_rate) if not p_arrival.empty else None,
                'trace_S_arrival_uncertainty_s': None,
                'trace_P_arrival_uncertainty_s': None,
                'trace_P_polarity': None,
                'trace_S_onset': "impulsive"if not s_arrival.empty else None,
                'trace_P_onset': "impulsive" if not p_arrival.empty else None,
                'trace_snr_db': None,
                'source_type_pnsn_label': None,
                'source_local_magnitude': None,
                'source_local_magnitude_uncertainty': None,
                'source_duration_magnitude': None,
                'source_duration_magnitude_uncertainty': None,
                'source_hand_magnitude': None,
                'trace_missing_channel': "",
                'trace_has_offset': None
            }
            meta_writer.writerow(row)
            meta_out.flush()
        except Exception as e:
            print(f"Error writing metadata for {station}/{trace_start_time}: {e}")
            # Write error immediately
            file_exists = os.path.exists(error_log_file)
            with open(error_log_file, "a", newline="") as errfile:
                writer = csv.DictWriter(errfile, fieldnames=['i_iter', 'network', 'station', 'starttime', 'endtime', 'stage', 'error'])
                if not file_exists:
                    writer.writeheader()
                writer.writerow({'i_iter': i_iter, 'network': network, 'station': station, 'starttime': trace_start_time, 'endtime': trace_end_time, 'stage': 'metadata_write', 'error': str(e)})
            continue
            

h5f.close()
meta_out.close()

Batch 0
Finished downloading from WaveformClient
Finished downloading NCEDC


 10%|█         | 1/10 [00:01<00:13,  1.51s/it]

NC KBO * HH? 2012-04-25T21:27:54.608850Z 2012-04-25T21:30:24.608850Z
HH


 20%|██        | 2/10 [00:02<00:07,  1.08it/s]

7D G03A * BH? 2012-04-25T21:27:54.608850Z 2012-04-25T21:30:24.608850Z
BH


 30%|███       | 3/10 [00:02<00:05,  1.37it/s]

NC KMPB * HH? 2012-04-25T21:55:33.572922Z 2012-04-25T21:58:03.572922Z
HH


 40%|████      | 4/10 [00:03<00:03,  1.57it/s]

NC KMR * HH? 2012-04-25T21:55:33.572922Z 2012-04-25T21:58:03.572922Z
HH


 50%|█████     | 5/10 [00:03<00:02,  1.69it/s]

UW OFR * BH? 2012-04-25T22:22:04.379107Z 2012-04-25T22:24:34.379107Z
BH


 60%|██████    | 6/10 [00:04<00:02,  1.80it/s]

7D G03A * BH? 2012-04-25T23:49:21.898869Z 2012-04-25T23:51:51.898869Z
BH


 70%|███████   | 7/10 [00:04<00:01,  1.84it/s]

BK JCC * HH? 2012-04-25T23:49:21.898869Z 2012-04-25T23:51:51.898869Z
HH


 80%|████████  | 8/10 [00:05<00:01,  1.87it/s]

NC KMPB * HH? 2012-04-25T23:49:21.898869Z 2012-04-25T23:51:51.898869Z
HH


 80%|████████  | 8/10 [00:05<00:01,  1.44it/s]

CN VGZ * HH? 2012-04-24T23:59:08.908738Z 2012-04-25T00:01:38.908738Z





IndexError: list index out of range

In [None]:
total_num_waveforms = 0
with h5py.File(output_waveform_file, 'r') as f:
    print("Keys in HDF5 file:")
    for key in f.keys():
        print(f" - {key}")
    print(f['data'].keys())

    waveforms_group = f['data']

    for i, key in enumerate(list(waveforms_group.keys())[:]):
                waveform = waveforms_group[key][:]
                print(f" - {key}: shape={waveform.shape}, min={waveform.min():.2f}, max={waveform.max():.2f}")
                total_num_waveforms += waveform.shape[0]

    # Plot a sample waveform if matplotlib is available
    if list(waveforms_group.keys()):
        sample_key = list(waveforms_group.keys())[0]
        sample_waveform = waveforms_group[sample_key][:]

        print('samplewaveform shape:', sample_waveform[0].shape)

        for i in range(len(sample_waveform[0])):
            plt.figure(figsize=(12, 4))
            y = sample_waveform[0,i,:]
            x = np.arange(len(y))
            plt.plot(x, y)
            plt.title(f"Sample Waveform: {sample_key}, Channel: {i}")
            plt.xlabel("Samples")
            plt.ylabel("Amplitude")
            plt.tight_layout()
            plt.show()
        

print(f"Total number of waveforms processed: {total_num_waveforms}")
    

BlockingIOError: [Errno 11] Unable to synchronously open file (unable to lock file, errno = 11, error message = 'Resource temporarily unavailable')

In [186]:
metadata = pd.read_csv("/wd1/hbito_data/data/datasets_all_regions/metadata_HH_BH_on_the_fly_bulk.csv")
metadata.iloc[50:70][['event_id','source_origin_time','source_latitude_deg','source_longitude_deg','source_depth_km','station_network_code','station_channel_code','station_code','station_latitude_deg','station_longitude_deg','trace_name','trace_P_arrival_sample', 'trace_S_arrival_sample','trace_P_onset','trace_S_onset','trace_start_time']]

Unnamed: 0,event_id,source_origin_time,source_latitude_deg,source_longitude_deg,source_depth_km,station_network_code,station_channel_code,station_code,station_latitude_deg,station_longitude_deg,trace_name,trace_P_arrival_sample,trace_S_arrival_sample,trace_P_onset,trace_S_onset,trace_start_time
50,ev1262781832_148,1262782000.0,47.3511,-121.62717,65.368,UW,BH,GNW,47.56413,-122.82498,"0$3,:3,:30000",6491.0,7727.0,impulsive,impulsive,2010-01-06T12:43:02.148000Z
51,ev1262781832_148,1262782000.0,47.3511,-121.62717,65.368,CN,BH,HNB,49.2745,-122.5792,"1$4,:3,:30000",8247.0,,impulsive,,2010-01-06T12:43:02.148000Z
52,ev1262795926_72022,1262796000.0,46.43613,-122.36196,15.949,UW,BH,GNW,47.56413,-122.82498,"4$6,:3,:30000",,8538.0,,impulsive,2010-01-06T16:37:56.720220Z
53,ev1262827366_425514,1262827000.0,49.67603,-122.62066,3.255,CN,BH,HNB,49.2745,-122.5792,"2$6,:3,:30000",5747.0,6307.0,impulsive,impulsive,2010-01-07T01:21:56.425514Z
54,ev1262827366_425514,1262827000.0,49.67603,-122.62066,3.255,CN,BH,WSLR,50.1266,-122.9212,"9$6,:3,:30000",5855.0,6476.0,impulsive,impulsive,2010-01-07T01:21:56.425514Z
55,ev1262827366_425514,1262827000.0,49.67603,-122.62066,3.255,UW,BH,PASS,48.998299,-122.085197,"7$4,:3,:30000",6372.0,7413.0,impulsive,impulsive,2010-01-07T01:21:56.425514Z
56,ev1262846500_554385,1262847000.0,47.61945,-122.29584,17.945,UW,BH,GNW,47.56413,-122.82498,"9$7,:3,:30000",,6277.0,,impulsive,2010-01-07T06:40:50.554385Z
57,ev1262855102_887915,1262855000.0,47.97134,-124.61241,31.8,CN,BH,HNB,49.2745,-122.5792,"8$5,:3,:30000",7950.0,10152.0,impulsive,impulsive,2010-01-07T09:04:12.887915Z
58,ev1262855102_887915,1262855000.0,47.97134,-124.61241,31.8,UW,BH,PASS,48.998299,-122.085197,"8$6,:3,:30000",,10470.0,,impulsive,2010-01-07T09:04:12.887915Z
59,ev1262855714_424391,1262856000.0,47.97078,-124.62573,35.22,CN,BH,HNB,49.2745,-122.5792,"2$7,:3,:30000",7945.0,10130.0,impulsive,impulsive,2010-01-07T09:14:24.424391Z


In [181]:
assoc_df['otime2picktime'] = assoc_df['pick_time'] - assoc_df['otime']
assoc_df['otime2picktime'].head(10)

0     9.917718
1    20.578118
2    16.017718
3    24.739718
4    26.356118
5    26.506118
6    30.798118
7    15.354640
8    15.633040
9    15.645040
Name: otime2picktime, dtype: float64

In [182]:
assoc_df.loc[assoc_df['otime2picktime'].idxmax()]

sta                                     CN.NLLB
pick_time                        1299419418.575
arid                                     141685
iphase                                        S
prob                                      0.658
orid                                      10009
phase                                         S
timeres                                   0.008
slatitude                               49.2271
slongitude                            -123.9882
selevation                                199.0
delta                                988.155904
esaz                                    4.90233
seaz                                  184.90233
lat                                    40.36281
lon                                  -124.97845
depth                                     1.861
otime                         1299419196.083291
nass                                         22
p_picks                                       0
s_picks                                 

In [None]:
batches_bulk_waveforms_chunks[3]

[('CN',
  'PGC',
  '*',
  'BH?',
  2012-10-06T09:48:47.045349Z,
  2012-10-06T09:51:17.045349Z),
 ('CN',
  'VGZ',
  '*',
  'HH?',
  2012-10-06T09:48:47.045349Z,
  2012-10-06T09:51:17.045349Z),
 ('CN',
  'SHB',
  '*',
  'BH?',
  2012-10-06T09:48:47.045349Z,
  2012-10-06T09:51:17.045349Z),
 ('7D',
  'FS06B',
  '*',
  'BH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS09B',
  '*',
  'BH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS11B',
  '*',
  'HH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS14B',
  '*',
  'BH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS20B',
  '*',
  'HH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS04B',
  '*',
  'HH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z),
 ('7D',
  'FS19B',
  '*',
  'HH?',
  2012-10-06T10:13:31.047768Z,
  2012-10-06T10:16:01.047768Z)]

In [74]:
batch[22080:22110]

[('NC',
  'KHMB',
  '*',
  'HH?',
  2012-04-25T20:43:13.969929Z,
  2012-04-25T20:45:43.969929Z),
 ('NC',
  'KHBB',
  '*',
  'HH?',
  2012-04-25T20:43:13.969929Z,
  2012-04-25T20:45:43.969929Z),
 ('NC',
  'KRMB',
  '*',
  'HH?',
  2012-04-25T20:43:13.969929Z,
  2012-04-25T20:45:43.969929Z),
 ('NC',
  'KSXB',
  '*',
  'HH?',
  2012-04-25T20:43:13.969929Z,
  2012-04-25T20:45:43.969929Z),
 ('7D',
  'M03A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('7D',
  'J49A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('7D',
  'FN08A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('TA',
  'D03D',
  '*',
  'BH?',
  2012-04-25T21:16:55.052013Z,
  2012-04-25T21:19:25.052013Z),
 ('NC',
  'KMPB',
  '*',
  'HH?',
  2012-04-25T21:27:54.608850Z,
  2012-04-25T21:30:24.608850Z),
 ('NC',
  'KMR',
  '*',
  'HH?',
  2012-04-25T21:27:54.608850Z,
  2012-04-25T21:30:24.608850Z),
 ('BK',
  'JCC',
  '*',
  'HH?

In [79]:
batch[21980:22000]

[('TA',
  'G03D',
  '*',
  'BH?',
  2012-04-24T15:24:43.079715Z,
  2012-04-24T15:27:13.079715Z),
 ('NC',
  'KMR',
  '*',
  'HH?',
  2012-04-24T15:27:33.462465Z,
  2012-04-24T15:30:03.462465Z),
 ('BK',
  'JCC',
  '*',
  'HH?',
  2012-04-24T15:27:33.462465Z,
  2012-04-24T15:30:03.462465Z),
 ('7D',
  'M08A',
  '*',
  'BH?',
  2012-04-24T18:07:29.166191Z,
  2012-04-24T18:09:59.166191Z),
 ('TA',
  'I02D',
  '*',
  'BH?',
  2012-04-24T18:07:29.166191Z,
  2012-04-24T18:09:59.166191Z),
 ('UW',
  'JEDS',
  '*',
  'BH?',
  2012-04-24T18:07:29.166191Z,
  2012-04-24T18:09:59.166191Z),
 ('7D',
  'J25A',
  '*',
  'BH?',
  2012-04-24T18:07:29.166191Z,
  2012-04-24T18:09:59.166191Z),
 ('UW',
  'LRIV',
  '*',
  'BH?',
  2012-04-24T20:48:51.135882Z,
  2012-04-24T20:51:21.135882Z),
 ('7D',
  'J73A',
  '*',
  'BH?',
  2012-04-24T20:48:51.135882Z,
  2012-04-24T20:51:21.135882Z),
 ('TA',
  'D03D',
  '*',
  'BH?',
  2012-04-25T01:19:42.772515Z,
  2012-04-25T01:22:12.772515Z),
 ('7D',
  'FN07A',
  '*',
  'HH?

In [59]:
frac = 0.9
window = 20
adjust = -90
batch_test = batches_bulk_waveforms_chunks[2][int(len(batches_bulk_waveforms_chunks[2])*frac)+adjust:int(len(batches_bulk_waveforms_chunks[2])*frac)+window+adjust]
batch_test

[('7D',
  'M08A',
  '*',
  'BH?',
  2012-04-24T11:04:49.329895Z,
  2012-04-24T11:07:19.329895Z),
 ('TA',
  'J01D',
  '*',
  'BH?',
  2012-04-24T11:04:49.329895Z,
  2012-04-24T11:07:19.329895Z),
 ('TA',
  'K02D',
  '*',
  'BH?',
  2012-04-24T11:04:49.329895Z,
  2012-04-24T11:07:19.329895Z),
 ('7D',
  'G03A',
  '*',
  'BH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('TA',
  'K02D',
  '*',
  'BH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('TA',
  'L02D',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('TA',
  'J01D',
  '*',
  'BH?',
  2012-04-24T15:01:32.684680Z,
  2012-04-24T15:04:02.684680Z),
 ('TA',
  'K02D',
  '*',
  'BH?',
  2012-04-24T15:01:32.684680Z,
  2012-04-24T15:04:02.684680Z),
 ('UW',
  'JEDS',
  '*',
  'BH?',
  2012-04-24T15:01:32.684680Z,
  2012-04-24T15:04:02.684680Z),
 ('UO',
  'DBO',
  '*',
  'BH?',
  2012-04-24T15:01:32.684680Z,
  2012-04-24T15:04:02.684680Z),
 ('TA',
  'G03D',
  '*',
  'BH?

In [101]:
frac = 0.9
window = 20
adjust = -20
batch_test = batches_bulk_waveforms_chunks[2][int(len(batches_bulk_waveforms_chunks[2])*frac)+adjust:int(len(batches_bulk_waveforms_chunks[2])*frac)+window+adjust]
batch_test

[('7D',
  'M03A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('7D',
  'J49A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('7D',
  'FN08A',
  '*',
  'HH?',
  2012-04-25T20:51:54.734134Z,
  2012-04-25T20:54:24.734134Z),
 ('TA',
  'D03D',
  '*',
  'BH?',
  2012-04-25T21:16:55.052013Z,
  2012-04-25T21:19:25.052013Z),
 ('7D',
  'G03A',
  '*',
  'BH?',
  2012-04-25T21:27:54.608850Z,
  2012-04-25T21:30:24.608850Z),
 ('UW',
  'OFR',
  '*',
  'BH?',
  2012-04-25T22:22:04.379107Z,
  2012-04-25T22:24:34.379107Z),
 ('7D',
  'G03A',
  '*',
  'BH?',
  2012-04-25T23:49:21.898869Z,
  2012-04-25T23:51:51.898869Z),
 ('CN',
  'VGZ',
  '*',
  'HH?',
  2012-04-24T23:59:08.908738Z,
  2012-04-24T23:59:59.999999Z),
 ('CN',
  'VGZ',
  '*',
  'HH?',
  2012-04-25T00:00:00.000000Z,
  2012-04-25T00:01:38.908738Z),
 ('CN',
  'SNB',
  '*',
  'HH?',
  2012-04-24T23:59:08.908738Z,
  2012-04-24T23:59:59.999999Z),
 ('CN',
  'SNB',
  '*',
  'HH?',


In [176]:
st_test = client_waveform.get_waveforms(network='TA', station='L02D', location="*", channel="BH?",
                                                starttime=obspy.UTCDateTime('2012-08-19T02:42:41.651371Z'), endtime=obspy.UTCDateTime('2012-08-19T02:45:11.651371Z'))
st_test

3 Trace(s) in Stream:
TA.L02D..BHE | 2012-08-19T02:42:41.650000Z - 2012-08-19T02:45:11.650000Z | 40.0 Hz, 6001 samples
TA.L02D..BHN | 2012-08-19T02:42:41.650000Z - 2012-08-19T02:45:11.650000Z | 40.0 Hz, 6001 samples
TA.L02D..BHZ | 2012-08-19T02:42:41.650000Z - 2012-08-19T02:45:11.650000Z | 40.0 Hz, 6001 samples

In [None]:
st_test = client_waveform.get_waveforms(network='CN', station='PFB', location="*", channel="HH?",
                                                starttime=obspy.UTCDateTime('2012-04-24T23:59:08.908738Z'), endtime=obspy.UTCDateTime('2012-04-24T23:59:59.999999Z'))
st_test

In [117]:
t1 = obspy.UTCDateTime('2012-04-24')

In [119]:
_sdata = client_waveform.get_waveforms(network='CN', station='PFB', channel='HH?', 
                                              year=t1.strftime('%Y'), month=t1.strftime('%m'), 
                                              day=t1.strftime('%d'))
_sdata

0 Trace(s) in Stream:


In [42]:
int(len(batches_bulk_waveforms_chunks)*frac)

3

In [44]:
len(batches_bulk_waveforms_chunks)

11

In [45]:
len(batches_bulk_waveforms_chunks[2])

16880

In [69]:
frac = 0.9  
window = 20
adjust  = -90
batches_bulk_waveforms_chunks_ncedc[2][int(len(batches_bulk_waveforms_chunks_ncedc[2])*frac)+adjust:int(len(batches_bulk_waveforms_chunks_ncedc[2])*frac)+window+adjust]

[('NC',
  'KSXB',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KBO',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KRP',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('BK',
  'JCC',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KHMB',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KHBB',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KMR',
  '*',
  'HH?',
  2012-04-24T14:21:07.424276Z,
  2012-04-24T14:23:37.424276Z),
 ('NC',
  'KMR',
  '*',
  'HH?',
  2012-04-24T15:27:33.462465Z,
  2012-04-24T15:30:03.462465Z),
 ('BK',
  'JCC',
  '*',
  'HH?',
  2012-04-24T15:27:33.462465Z,
  2012-04-24T15:30:03.462465Z),
 ('BK',
  'JCC',
  '*',
  'HH?',
  2012-04-25T01:28:13.790263Z,
  2012-04-25T01:30:43.790263Z),
 ('NC',
  'KCT',
  '*',
  'HH?',
  20

In [174]:
inv_test = inv.select(network='TA', station='L02D', time=obspy.UTCDateTime('2012-08-19T02:45:11.651371Z'))

In [175]:
inv_test

Inventory created at 2025-08-09T21:47:51.198600Z
	Created by: IRIS WEB SERVICE: fdsnws-station | version: 1.1.52
		    
	Sending institution: IRIS-DMC (IRIS-DMC)
	Contains:
		Networks (1):
			TA
		Stations (1):
			TA.L02D (Cave Junction, OR, USA)
		Channels (6):
			TA.L02D..BHZ, TA.L02D..BHN, TA.L02D..BHE, TA.L02D..HHZ, 
			TA.L02D..HHN, TA.L02D..HHE

In [82]:
assoc_df.loc[assoc_df['sta'] == 'CN.VGZ']['otime']

120746     1.294234e+09
120752     1.294234e+09
120826     1.294265e+09
120836     1.294265e+09
121132     1.294325e+09
               ...     
1003940    1.435056e+09
1004138    1.435084e+09
1004145    1.435084e+09
1004206    1.435086e+09
1004211    1.435086e+09
Name: otime, Length: 4574, dtype: float64

In [83]:
assoc_df.keys()

Index(['sta', 'pick_time', 'arid', 'iphase', 'prob', 'orid', 'phase',
       'timeres', 'slatitude', 'slongitude', 'selevation', 'delta', 'esaz',
       'seaz', 'lat', 'lon', 'depth', 'otime', 'nass', 'p_picks', 's_picks',
       'rms', 'nsphz', 'gap', 'algorithm', 'network', 'station', 'event_id',
       'otime2picktime'],
      dtype='object')

In [84]:
assoc_df['otime_datetime'] = pd.to_datetime(assoc_df['otime'], unit='s')

In [86]:
assoc_df.loc[(assoc_df['sta'] == 'CN.VGZ')&(assoc_df['otime_datetime'] >= '2012-04-23')&(assoc_df['otime_datetime'] < '2012-04-26')]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime2picktime,otime_datetime
305530,CN.VGZ,1335201000.0,305530,P,0.678,20993,P,0.058,48.4131,-123.3251,...,9,1.39,9.0,216.325567,genie,CN,VGZ,ev1335201492_102892,7.517108,2012-04-23 17:18:12.102892032
305536,CN.VGZ,1335202000.0,305536,S,0.678,20993,S,-0.547,48.4131,-123.3251,...,9,1.39,9.0,216.325567,genie,CN,VGZ,ev1335201492_102892,13.317108,2012-04-23 17:18:12.102892032
306465,CN.VGZ,1335312000.0,306465,P,0.883,21056,P,0.009,48.4131,-123.3251,...,16,0.564,16.0,82.919992,genie,CN,VGZ,ev1335311998_908738,6.091262,2012-04-24 23:59:58.908737792
306480,CN.VGZ,1335312000.0,306480,S,0.883,21056,S,0.43,48.4131,-123.3251,...,16,0.564,16.0,82.919992,genie,CN,VGZ,ev1335311998_908738,10.781262,2012-04-24 23:59:58.908737792


In [87]:
unique_n_s_otime['otime_datetime'] = pd.to_datetime(unique_n_s_otime['otime'], unit='s')

In [125]:
unique_n_s_otime['pick_time_datetime'] = pd.to_datetime(unique_n_s_otime['pick_time'], unit='s')

In [135]:
unique_n_s_otime.loc[(unique_n_s_otime['sta'] == 'CN.VGZ')&(unique_n_s_otime['otime_datetime'] >= '2012-04-23')&(unique_n_s_otime['otime_datetime'] < '2012-04-26')]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime_datetime,pick_time_datetime
200877,CN.VGZ,1335201000.0,305530,P,0.678,20993,P,0.058,48.4131,-123.3251,...,9,1.39,9.0,216.325567,genie,CN,VGZ,ev1335201492_102892,2012-04-23 17:18:12.102892032,2012-04-23 17:18:19.620
201499,CN.VGZ,1335312000.0,306465,P,0.883,21056,P,0.009,48.4131,-123.3251,...,16,0.564,16.0,82.919992,genie,CN,VGZ,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:05.000


In [90]:
unique_n_s_otime.loc[(unique_n_s_otime['sta'] == 'TA.D03D')&(unique_n_s_otime['otime_datetime'] >= '2012-04-23')&(unique_n_s_otime['otime_datetime'] < '2012-04-26')]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,p_picks,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime_datetime
200787,TA.D03D,1335175000.0,305391,S,0.619,20980,S,-0.078,47.534698,-123.089401,...,4,4,0.061,4.0,291.694673,genie,TA,D03D,ev1335175430_197962,2012-04-23 10:03:50.197961984
200803,TA.D03D,1335188000.0,305413,P,0.701,20983,P,-0.017,47.534698,-123.089401,...,6,6,0.195,6.0,203.766675,genie,TA,D03D,ev1335187604_286832,2012-04-23 13:26:44.286832128
200947,TA.D03D,1335207000.0,305644,P,0.871,20999,P,0.088,47.534698,-123.089401,...,26,25,0.764,25.0,170.774827,genie,TA,D03D,ev1335207264_173474,2012-04-23 18:54:24.173474048
201016,TA.D03D,1335219000.0,305750,P,0.887,21004,P,0.138,47.534698,-123.089401,...,30,29,0.793,29.0,172.847455,genie,TA,D03D,ev1335219479_619015,2012-04-23 22:17:59.619014912
201133,TA.D03D,1335257000.0,305922,P,0.652,21017,P,-0.156,47.534698,-123.089401,...,6,7,0.303,7.0,248.741362,genie,TA,D03D,ev1335256639_793067,2012-04-24 08:37:19.793067008
201228,TA.D03D,1335317000.0,306060,P,0.738,21029,P,0.081,47.534698,-123.089401,...,7,10,0.589,10.0,235.003155,genie,TA,D03D,ev1335316832_772515,2012-04-25 01:20:32.772515072
201246,TA.D03D,1335318000.0,306085,P,0.831,21031,P,0.071,47.534698,-123.089401,...,12,22,0.583,22.0,45.211531,genie,TA,D03D,ev1335317598_448957,2012-04-25 01:33:18.448956928
201444,TA.D03D,1335389000.0,306380,P,0.808,21051,P,0.242,47.534698,-123.089401,...,12,10,0.624,10.0,186.766425,genie,TA,D03D,ev1335388665_052013,2012-04-25 21:17:45.052012800


In [91]:
unique_n_s_otime.loc[(unique_n_s_otime['sta'] == 'UW.LEBA')&(unique_n_s_otime['otime_datetime'] >= '2012-04-23')&(unique_n_s_otime['otime_datetime'] < '2012-04-26')]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,p_picks,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime_datetime
201036,UW.LEBA,1335220000.0,305770,P,0.887,21004,P,2.306,46.545898,-123.563202,...,30,29,0.793,29.0,172.847455,genie,UW,LEBA,ev1335219479_619015,2012-04-23 22:17:59.619014912
201410,UW.LEBA,1335369000.0,306323,P,0.651,21047,P,-0.291,46.545898,-123.563202,...,18,10,0.862,10.0,233.474133,genie,UW,LEBA,ev1335368578_971975,2012-04-25 15:42:58.971975168


In [134]:
unique_n_s_otime.loc[(unique_n_s_otime['otime_datetime'] >= '2012-04-24')&(unique_n_s_otime['otime_datetime'] < '2012-04-27')][150:170]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime_datetime,pick_time_datetime
201211,PB.B003,1335301000.0,306030,P,0.751,21027,P,-0.313,48.062359,-124.140862,...,10,0.692,10.0,115.479275,genie,PB,B003,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:50.728399872
201212,PB.B001,1335301000.0,306031,P,0.751,21027,P,0.177,48.043072,-123.131409,...,10,0.692,10.0,115.479275,genie,PB,B001,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:50.878400000
201213,UW.MCW,1335301000.0,306032,P,0.751,21027,P,-0.197,48.679,-122.8326,...,10,0.692,10.0,115.479275,genie,UW,MCW,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:51.320000000
201214,PB.B004,1335301000.0,306033,P,0.751,21027,P,-0.562,48.201923,-124.42701,...,10,0.692,10.0,115.479275,genie,PB,B004,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:51.788399872
201215,UW.BLN,1335301000.0,306034,P,0.751,21027,P,-0.262,48.006624,-122.972646,...,10,0.692,10.0,115.479275,genie,UW,BLN,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:52.200000000
201216,UW.HDW,1335301000.0,306035,P,0.751,21027,P,-0.714,47.64903,-123.0535,...,10,0.692,10.0,115.479275,genie,UW,HDW,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:49:56.740000000
201217,7D.J73A,1335301000.0,306036,P,0.751,21027,P,2.497,48.7677,-126.192497,...,10,0.692,10.0,115.479275,genie,7D,J73A,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:50:13.383399936
201218,UW.SHW,1335301000.0,306037,P,0.751,21027,P,-0.187,46.19364,-122.23492,...,10,0.692,10.0,115.479275,genie,UW,SHW,ev1335300581_135882,2012-04-24 20:49:41.135881984,2012-04-24 20:50:21.660000000
201219,PB.B009,1335314000.0,306048,P,0.632,21028,P,-0.036,48.64867,-123.451172,...,4,0.028,4.0,180.41157,genie,PB,B009,ev1335313506_035775,2012-04-25 00:25:06.035774976,2012-04-25 00:25:09.358400000
201220,PB.B011,1335314000.0,306049,P,0.632,21028,P,0.016,48.649544,-123.448189,...,4,0.028,4.0,180.41157,genie,PB,B011,ev1335313506_035775,2012-04-25 00:25:06.035774976,2012-04-25 00:25:09.418400000


In [137]:
unique_n_s_otime.iloc[201495:201520]

Unnamed: 0,sta,pick_time,arid,iphase,prob,orid,phase,timeres,slatitude,slongitude,...,s_picks,rms,nsphz,gap,algorithm,network,station,event_id,otime_datetime,pick_time_datetime
201495,BK.JCC,1335398000.0,306456,P,0.751,21055,P,-0.079,40.817451,-124.029549,...,6,0.673,6.0,192.986384,genie,BK,JCC,ev1335397811_898869,2012-04-25 23:50:11.898868992,2012-04-25 23:50:31.148392960
201496,PB.B933,1335398000.0,306457,P,0.751,21055,P,0.287,40.060001,-123.969002,...,6,0.673,6.0,192.986384,genie,PB,B933,ev1335397811_898869,2012-04-25 23:50:11.898868992,2012-04-25 23:50:31.458400000
201497,PB.B935,1335398000.0,306458,P,0.751,21055,P,0.531,40.478699,-123.573196,...,6,0.673,6.0,192.986384,genie,PB,B935,ev1335397811_898869,2012-04-25 23:50:11.898868992,2012-04-25 23:50:35.908400128
201498,NC.KMPB,1335398000.0,306462,S,0.751,21055,S,0.974,40.417194,-124.120758,...,6,0.673,6.0,192.986384,genie,NC,KMPB,ev1335397811_898869,2012-04-25 23:50:11.898868992,2012-04-25 23:50:44.210000128
201499,CN.VGZ,1335312000.0,306465,P,0.883,21056,P,0.009,48.4131,-123.3251,...,16,0.564,16.0,82.919992,genie,CN,VGZ,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:05.000000000
201500,PB.B005,1335312000.0,306466,P,0.883,21056,P,-0.369,48.059547,-123.503281,...,16,0.564,16.0,82.919992,genie,PB,B005,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:05.348400128
201501,PB.B006,1335312000.0,306467,P,0.883,21056,P,-0.361,48.0588,-123.500801,...,16,0.564,16.0,82.919992,genie,PB,B006,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:05.358400000
201502,PB.B007,1335312000.0,306468,P,0.883,21056,P,-0.369,48.057575,-123.504112,...,16,0.564,16.0,82.919992,genie,PB,B007,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:05.368400128
201503,PB.B001,1335312000.0,306469,P,0.883,21056,P,-0.289,48.043072,-123.131409,...,16,0.564,16.0,82.919992,genie,PB,B001,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:06.138400000
201504,UW.BLN,1335312000.0,306470,P,0.883,21056,P,-0.675,48.006624,-122.972646,...,16,0.564,16.0,82.919992,genie,UW,BLN,ev1335311998_908738,2012-04-24 23:59:58.908737792,2012-04-25 00:00:06.990899968


In [95]:
type(unique_n_s_otime['otime_datetime'].iloc[0])

pandas._libs.tslibs.timestamps.Timestamp

In [103]:
total_length = 0
for batch in batches_bulk_waveforms:
    total_length += len(batch)

total_length

388999

In [102]:
len(batches_bulk_waveforms)

11

In [104]:
count_EH_pairs

301489

In [105]:
total_length+count_EH_pairs

690488

In [106]:
len(unique_n_s_otime)

690488

In [172]:
frac = 0.40
window = 30
adjust  =-490
batches_bulk_waveforms_chunks[3][int(len(batches_bulk_waveforms_chunks[3])*frac)+adjust:int(len(batches_bulk_waveforms_chunks[3])*frac)+window+adjust]

[('TA',
  'K02D',
  '*',
  'BH?',
  2012-08-18T16:40:38.917900Z,
  2012-08-18T16:43:08.917900Z),
 ('UW',
  'JEDS',
  '*',
  'BH?',
  2012-08-18T16:40:38.917900Z,
  2012-08-18T16:43:08.917900Z),
 ('UW',
  'RADR',
  '*',
  'BH?',
  2012-08-18T16:59:12.137847Z,
  2012-08-18T17:01:42.137847Z),
 ('TA',
  'I03D',
  '*',
  'BH?',
  2012-08-18T16:59:12.137847Z,
  2012-08-18T17:01:42.137847Z),
 ('7D',
  'M09B',
  '*',
  'HH?',
  2012-08-18T16:59:12.137847Z,
  2012-08-18T17:01:42.137847Z),
 ('UW',
  'WISH',
  '*',
  'BH?',
  2012-08-18T18:16:43.792100Z,
  2012-08-18T18:19:13.792100Z),
 ('TA',
  'D03D',
  '*',
  'BH?',
  2012-08-18T18:16:43.792100Z,
  2012-08-18T18:19:13.792100Z),
 ('TA',
  'K02D',
  '*',
  'BH?',
  2012-08-18T18:54:58.414744Z,
  2012-08-18T18:57:28.414744Z),
 ('UO',
  'DBO',
  '*',
  'BH?',
  2012-08-18T18:54:58.414744Z,
  2012-08-18T18:57:28.414744Z),
 ('TA',
  'K02D',
  '*',
  'BH?',
  2012-08-18T21:30:35.403768Z,
  2012-08-18T21:33:05.403768Z),
 ('7D',
  'M09B',
  '*',
  'HH?

In [180]:
test = pd.read_csv('/wd1/hbito_data/data/datasets_all_regions/metadata_HH_BH_on_the_fly_bulk.csv', index_col=0)
test['trace_name']

event_id
ev1262304917_262282        1$0,:3,:15000
ev1262305009_37536         8$0,:3,:15000
ev1262305009_37536         2$0,:3,:15000
ev1262305009_37536         7$0,:3,:15000
ev1262330283_689209       10$0,:3,:15000
                             ...        
ev1317352526_658044    10$1248,:3,:15000
ev1317352526_658044     3$1316,:3,:15000
ev1317352526_658044     7$1245,:3,:15000
ev1317352526_658044     2$1246,:3,:15000
ev1317360609_018271     5$1284,:3,:15000
Name: trace_name, Length: 13940, dtype: object