In [1]:
import numpy as np
import torch
from pathlib import Path
from obspy import UTCDateTime, read_inventory, Inventory, read
from tqdm import tqdm
from geokernels.distance import geodist

# contains preprocessed waveforms (output of prep.py)
waveform_dir = "waveforms_prep/"
# contains station metadata (output of download.py)
station_dir = "stations/"

# landslide coordinates from Google Earth
loc_reference = -26.9621, 72.8087

event_time = UTCDateTime("2023-09-16T12:35:00.0Z")
window_length = 24 * 3600

In [2]:
# list of stations for which prepped waveforms are available
stations_prepped = [
    x.name.removesuffix(".mseed") for x in Path(waveform_dir).glob("*.mseed")
]

# parse inventory for only those
inv = read_inventory(f"{station_dir}/*")
prepped_inv = Inventory()
for sta in sorted(stations_prepped):
    prepped_inv += inv.select(
        network=f"{sta.split('.')[0]}",
        station=f"{sta.split('.')[1]}",
    )

# extract array coordinates, and remember order of coordinates
coordinates = []
coordinates_stations = []
for net in prepped_inv:
    for sta in net:
        # skip duplicates
        if f"{net.code}.{sta.code}" in coordinates_stations:
            continue
        coordinates.append([sta.longitude, sta.latitude])
        coordinates_stations.append(f"{net.code}.{sta.code}")
coordinates = torch.tensor(np.array(coordinates))

print(f"coordinates available for {len(coordinates)} stations")

# make sure to load data in order of coordinates_stations
waveforms = torch.zeros(len(coordinates_stations), int(window_length * 0.1))
used_coords = []
for sta_idx, sta in tqdm(enumerate(coordinates_stations)):
    st = read(f"{waveform_dir}/{sta}.mseed")
    # should not fail, because prep.py produces single-trace seismograms for each station
    tr = st.select(channel="*Z")[0]
    tr.trim(
        starttime=event_time - 3600,
        endtime=event_time + window_length,
        pad=True,
        fill_value=0,
    )

    waveforms[sta_idx, :] = torch.tensor(
        tr.data[: int(window_length * tr.stats.sampling_rate)]
    )

print(
    f"waveforms loaded for {waveforms.shape[1]} stations in {waveforms.shape[0]} time windows"
)

coordinates available for 1488 stations


1488it [00:58, 25.62it/s]

waveforms loaded for 1440 stations in 1488 time windows





In [3]:
# expand loc_reference to same shape as coordinates
loc_reference_e = torch.tensor(loc_reference).flip(0).expand(coordinates.shape)
distances_to_reference = torch.tensor(
    geodist(coordinates.flip(1), loc_reference_e, metric="km")
)
# sort wavefr
distances_to_reference_sort, sort_idx = torch.sort(distances_to_reference)
waveforms_sort = waveforms[sort_idx, :]
coordinates_sort = coordinates[sort_idx, :]
coordinates_stations_sort = np.array(coordinates_stations)[sort_idx]
# distances_to_reference_sort = distances_to_reference[sort_idx]

In [4]:
# save to use in plot_fig4B.py
torch.save(
    {
        "waveforms": waveforms_sort,
        "coordinates": coordinates_sort,
        "station_names": coordinates_stations_sort,
        "distances_to_reference": distances_to_reference_sort,
    },
    "waveforms_24h.pt",
)