In [2]:
import pickle
import random
import h5py
import soundfile as sf
import numpy as np
import os

In [None]:
HDF5_PATH = "../data/merge.hdf5"
OUTPUT_DIR = "../earthquake_wavs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

FS = 100


with h5py.File(HDF5_PATH, 'r') as f:
    all_keys = list(f['data'].keys())
    print(f"Total samples: {len(all_keys)}")

    for key in all_keys[:5]:
        waveform = f['data'][key][()]
        
        print(f"Saving {key} -> shape {waveform.shape}, dtype {waveform.dtype}")

        sf.write(
            os.path.join(OUTPUT_DIR, f"{key}.wav"),
            waveform,
            FS,
            subtype='FLOAT'
        )

print(f"Saved {min(5, len(all_keys))} .wav files to {OUTPUT_DIR}")


Total samples: 1265657
Saving 109C.TA_20060723155859_EV -> shape (6000, 3), dtype float32
Saving 109C.TA_20061103155652_EV -> shape (6000, 3), dtype float32
Saving 109C.TA_20061103161223_EV -> shape (6000, 3), dtype float32
Saving 109C.TA_20061114133221_EV -> shape (6000, 3), dtype float32
Saving 109C.TA_20061127104640_EV -> shape (6000, 3), dtype float32
Saved 5 .wav files to ../earthquake_wavs


In [11]:
import pickle
import h5py
import os
import soundfile as sf

HDF5_PATH = "../data/merge.hdf5"
LABEL_PATH = "../preprocessed/key_to_label.pkl"
OUTPUT_DIR = "../noise_wavs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

FS = 100


with open(LABEL_PATH, "rb") as f:
  key_to_label = pickle.load(f)


with h5py.File(HDF5_PATH, "r") as f:
  count = 0
  for key, label in key_to_label.items():
    if label == 0:  # Noise sample
      waveform = f["data"][key][()]
      sf.write(os.path.join(OUTPUT_DIR, f"{key}.wav"), waveform, FS)
      count += 1
      if count >= 6:
        break

print(f"Saved {count} noise .wav files in {OUTPUT_DIR}")


Saved 6 noise .wav files in ../noise_wavs


In [3]:
with open("../preprocessed/key_to_label.pkl", "rb") as f:
    key_to_label = pickle.load(f)

with open("../preprocessed/station_to_keys.pkl", "rb") as f:
    station_to_keys = pickle.load(f)

In [6]:
file_path = "../data/merge.hdf5"


hdf5_file = h5py.File(file_path, 'r')
data_group = hdf5_file['data']

In [14]:
noise_keys = np.load("../preprocessed/noise_keys.npy")
noise_keys

array(['ANON.AV_20180116000618_NO', 'ARK1.7F_20100726193148_NO',
       'ANPB.AV_20180115192448_NO', ..., 'AUCH.AV_20180115215930_NO',
       'AC02.C1_201511091043_NO', 'AC04.C1_201505021244_NO'], dtype='<U25')

In [3]:
stations = list(station_to_keys.keys())
random.seed(40)
random.shuffle(stations)

n_total = len(stations)
n_train = int(0.8 * n_total)
n_val = int(0.1 * n_total)

train_stations = stations[:n_train]
val_stations = stations[n_train:n_train + n_val]
test_stations = stations[n_train + n_val:]

In [4]:
def collect_samples(station_list):
  return np.array([
    (key, key_to_label[key])
    for st in station_list
    for key in station_to_keys[st]
  ], dtype=object)

train_samples = collect_samples(train_stations)
val_samples   = collect_samples(val_stations)
test_samples  = collect_samples(test_stations)

print(train_samples.shape)
print(val_samples.shape)
print(test_samples.shape)

(55266, 2)
(7420, 2)
(7314, 2)


In [5]:
def get_sample_keys(samples):
  arr = []
  for sample in samples:
    arr.append(sample[0])
  
  return arr

test_keys = get_sample_keys(test_samples)

In [6]:
HDF5_PATH = "../data/merge.hdf5"
OUTPUT_DIR = "../test_wavs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

FS = 100


with h5py.File(HDF5_PATH, 'r') as f:
    all_keys = list(f['data'].keys())
    print(f"Total samples: {len(all_keys)}")
    count = 0
    for key in test_keys:
        waveform = f['data'][key][()]
        print(f"Saving {key} -> shape {waveform.shape}, dtype {waveform.dtype}")

        sf.write(
            os.path.join(OUTPUT_DIR, f"{key}.wav"),
            waveform,
            FS,
            subtype='FLOAT'
        )
        count += 1
        
        if count > 6:
            break

print(f"Saved {count} .wav files to {OUTPUT_DIR}")

Total samples: 1265657
Saving ACTO.PO_200909062326_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201104201236_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201010190456_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201307140923_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201101161500_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201303091104_NO -> shape (6000, 3), dtype float32
Saving ACTO.PO_201005200512_NO -> shape (6000, 3), dtype float32
Saved 7 .wav files to ../test_wavs


In [9]:
print(key_to_label['ACTO.PO_200909062326_NO'])
print(key_to_label['ACTO.PO_201104201236_NO'])
print(key_to_label['ACTO.PO_201010190456_NO'])
print(key_to_label['ACTO.PO_201307140923_NO'])
print(key_to_label['ACTO.PO_201101161500_NO'])
print(key_to_label['ACTO.PO_201303091104_NO'])
print(key_to_label['ACTO.PO_201005200512_NO'])

0
0
0
0
0
0
0
