# stead_convert

Convert STEAD dataset to SP dataset.

In [1]:
import random
import h5py as h5
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Modifying sys.path to be able to load project packages
import sys
sys.path.append('../')

# Load project packages
from utils.h5_tools import write_batch

In [2]:
stead_path = 'C:/data/datasets/STEAD/merge.hdf5'
stead_csv_path = 'C:/data/datasets/STEAD/merge.csv'
save_path = 'C:/data/datasets/STEAD/stead_converted.h5'

In [3]:
batch_size = 300000
target_trace_length = 400
trarget_half_length = int(target_trace_length/2)

noise_traces_per_base_trace = 2

stdout_step = 1000

P_CODE = 0
S_CODE = 1
NOISE_CODE = 2

## CSV columns

network_code,receiver_code,receiver_type,receiver_latitude,receiver_longitude,receiver_elevation_m,p_arrival_sample,

p_status,p_weight,p_travel_sec,s_arrival_sample,s_status,s_weight,source_id,source_origin_time,source_origin_uncertainty_sec,

source_latitude,source_longitude,source_error_sec,source_gap_deg,source_horizontal_uncertainty_km,source_depth_km,

source_depth_uncertainty_km,source_magnitude,source_magnitude_type,source_magnitude_author,source_mechanism_strike_dip_rake,

source_distance_deg,source_distance_km,back_azimuth_deg,snr_db,coda_end_sample,trace_start_time,trace_category,trace_name

In [4]:
def cut_from_trace(trace, start, length):
    end = start + length
    if end > trace.shape[0]:
        end = trace.shape[0]
        start = end - length
    return trace[start:end].copy()


def get_random_trace_start(length, min_dist, previous=None, max_tries=1000):
    assert max_tries > 0
    if previous is None:
        previous = []
    
    while True:
        max_tries -= 1
        start = random.randint(0, length-min_dist)
        
        passed = True
        for x in previous:
            if int(abs(x-start)) <= min_dist:
                passed = False
                break
        
        if passed:
            return start
        if max_tries == 0:
            return None


def convert_trace(dataset, trace_name, trace_category, p_arrival_sample, s_arrival_sample):
    """
    Converts STEAD trace to one or multiple SP dataset entries (P and S waves or two noise samples).
    """
    trace = dataset[trace_name]

    p_arrival_sample = int(p_arrival_sample) if not np.isnan(p_arrival_sample) else None
    s_arrival_sample = int(s_arrival_sample) if not np.isnan(s_arrival_sample) else None

    is_noise = False
    if (not p_arrival_sample and not s_arrival_sample) or trace_category == 'noise':
        is_noise = True

    X = []
    Y = []

    start_positions = []

    # Cut noise
    if is_noise:
        l = trace.shape[0]
        for _ in range(noise_traces_per_base_trace):
            start = get_random_trace_start(l, target_trace_length, start_positions)
            start_positions.append(start)
            X.append(cut_from_trace(trace, start, target_trace_length))
            Y.append(NOISE_CODE)
        return X, Y

    # Cut events
    if p_arrival_sample:
        X.append(cut_from_trace(trace, max(p_arrival_sample-trarget_half_length, 0), target_trace_length))
        Y.append(P_CODE)
    if s_arrival_sample:
        X.append(cut_from_trace(trace, max(s_arrival_sample-trarget_half_length, 0), target_trace_length))
        Y.append(S_CODE)

    return X, Y

In [5]:
dataset = h5.File(stead_path, 'r') 
dataset = dataset['data']

df = pd.read_csv(stead_csv_path)
n_traces = len(df); n_traces

  exec(code_obj, self.user_global_ns, self.user_ns)


1265657

In [6]:
random.seed(42)

batch_X = np.zeros((batch_size, target_trace_length, 3))
batch_Y = np.zeros(batch_size)

current_idx = 0
total = 0

last_mark = 1

In [7]:
for i in range(n_traces):
  
    row = df.loc[i]
    trace_name = row['trace_name']
    trace_category = row['trace_category']
    p_arrival_sample = row['p_arrival_sample']
    s_arrival_sample = row['s_arrival_sample']
    X, Y = convert_trace(dataset, trace_name, trace_category, p_arrival_sample, s_arrival_sample)
    
    for j in range(len(X)):
        batch_X[current_idx] = X[j]
        batch_Y[current_idx] = Y[j]
        current_idx += 1
        total += 1
        
        if current_idx >= batch_size:
            write_batch(save_path, 'X', batch_X)
            write_batch(save_path, 'Y', batch_Y)
            print(f'Batch written.. Total of {total} traces.')
            current_idx = 0
            
    if total > last_mark*stdout_step:
        print(f'\tConverted {total} traces.')
        last_mark += 1
            
if current_idx != 0:
    write_batch(save_path, 'X', batch_X[:current_idx])
    write_batch(save_path, 'Y', batch_Y[:current_idx])

	Converted 1002 traces.
	Converted 2002 traces.
	Converted 3002 traces.
	Converted 4002 traces.
	Converted 5002 traces.
	Converted 6002 traces.
	Converted 7002 traces.
	Converted 8002 traces.
	Converted 9002 traces.
	Converted 10002 traces.
	Converted 11002 traces.
	Converted 12002 traces.
	Converted 13002 traces.
	Converted 14002 traces.
	Converted 15002 traces.
	Converted 16002 traces.
	Converted 17002 traces.
	Converted 18002 traces.
	Converted 19002 traces.
	Converted 20002 traces.
	Converted 21002 traces.
	Converted 22002 traces.
	Converted 23002 traces.
	Converted 24002 traces.
	Converted 25002 traces.
	Converted 26002 traces.
	Converted 27002 traces.
	Converted 28002 traces.
	Converted 29002 traces.
	Converted 30002 traces.
	Converted 31002 traces.
	Converted 32002 traces.
	Converted 33002 traces.
	Converted 34002 traces.
	Converted 35002 traces.
	Converted 36002 traces.
	Converted 37002 traces.
	Converted 38002 traces.
	Converted 39002 traces.
	Converted 40002 traces.
	Converte

	Converted 319002 traces.
	Converted 320002 traces.
	Converted 321002 traces.
	Converted 322002 traces.
	Converted 323002 traces.
	Converted 324002 traces.
	Converted 325002 traces.
	Converted 326002 traces.
	Converted 327002 traces.
	Converted 328002 traces.
	Converted 329002 traces.
	Converted 330002 traces.
	Converted 331002 traces.
	Converted 332002 traces.
	Converted 333002 traces.
	Converted 334002 traces.
	Converted 335002 traces.
	Converted 336002 traces.
	Converted 337002 traces.
	Converted 338002 traces.
	Converted 339002 traces.
	Converted 340002 traces.
	Converted 341002 traces.
	Converted 342002 traces.
	Converted 343002 traces.
	Converted 344002 traces.
	Converted 345002 traces.
	Converted 346002 traces.
	Converted 347002 traces.
	Converted 348002 traces.
	Converted 349002 traces.
	Converted 350002 traces.
	Converted 351002 traces.
	Converted 352002 traces.
	Converted 353002 traces.
	Converted 354002 traces.
	Converted 355002 traces.
	Converted 356002 traces.
	Converted 3

	Converted 633002 traces.
	Converted 634002 traces.
	Converted 635002 traces.
	Converted 636002 traces.
	Converted 637002 traces.
	Converted 638002 traces.
	Converted 639002 traces.
	Converted 640002 traces.
	Converted 641002 traces.
	Converted 642002 traces.
	Converted 643002 traces.
	Converted 644002 traces.
	Converted 645002 traces.
	Converted 646002 traces.
	Converted 647002 traces.
	Converted 648002 traces.
	Converted 649002 traces.
	Converted 650002 traces.
	Converted 651002 traces.
	Converted 652002 traces.
	Converted 653002 traces.
	Converted 654002 traces.
	Converted 655002 traces.
	Converted 656002 traces.
	Converted 657002 traces.
	Converted 658002 traces.
	Converted 659002 traces.
	Converted 660002 traces.
	Converted 661002 traces.
	Converted 662002 traces.
	Converted 663002 traces.
	Converted 664002 traces.
	Converted 665002 traces.
	Converted 666002 traces.
	Converted 667002 traces.
	Converted 668002 traces.
	Converted 669002 traces.
	Converted 670002 traces.
	Converted 6

	Converted 947002 traces.
	Converted 948002 traces.
	Converted 949002 traces.
	Converted 950002 traces.
	Converted 951002 traces.
	Converted 952002 traces.
	Converted 953002 traces.
	Converted 954002 traces.
	Converted 955002 traces.
	Converted 956002 traces.
	Converted 957002 traces.
	Converted 958002 traces.
	Converted 959002 traces.
	Converted 960002 traces.
	Converted 961002 traces.
	Converted 962002 traces.
	Converted 963002 traces.
	Converted 964002 traces.
	Converted 965002 traces.
	Converted 966002 traces.
	Converted 967002 traces.
	Converted 968002 traces.
	Converted 969002 traces.
	Converted 970002 traces.
	Converted 971002 traces.
	Converted 972002 traces.
	Converted 973002 traces.
	Converted 974002 traces.
	Converted 975002 traces.
	Converted 976002 traces.
	Converted 977002 traces.
	Converted 978002 traces.
	Converted 979002 traces.
	Converted 980002 traces.
	Converted 981002 traces.
	Converted 982002 traces.
	Converted 983002 traces.
	Converted 984002 traces.
	Converted 9

	Converted 1251002 traces.
	Converted 1252002 traces.
	Converted 1253002 traces.
	Converted 1254002 traces.
	Converted 1255002 traces.
	Converted 1256002 traces.
	Converted 1257002 traces.
	Converted 1258002 traces.
	Converted 1259002 traces.
	Converted 1260002 traces.
	Converted 1261002 traces.
	Converted 1262002 traces.
	Converted 1263002 traces.
	Converted 1264002 traces.
	Converted 1265002 traces.
	Converted 1266002 traces.
	Converted 1267002 traces.
	Converted 1268002 traces.
	Converted 1269002 traces.
	Converted 1270002 traces.
	Converted 1271002 traces.
	Converted 1272002 traces.
	Converted 1273002 traces.
	Converted 1274002 traces.
	Converted 1275002 traces.
	Converted 1276002 traces.
	Converted 1277002 traces.
	Converted 1278002 traces.
	Converted 1279002 traces.
	Converted 1280002 traces.
	Converted 1281002 traces.
	Converted 1282002 traces.
	Converted 1283002 traces.
	Converted 1284002 traces.
	Converted 1285002 traces.
	Converted 1286002 traces.
	Converted 1287002 traces.
	

	Converted 1553002 traces.
	Converted 1554002 traces.
	Converted 1555002 traces.
	Converted 1556002 traces.
	Converted 1557002 traces.
	Converted 1558002 traces.
	Converted 1559002 traces.
	Converted 1560002 traces.
	Converted 1561002 traces.
	Converted 1562002 traces.
	Converted 1563002 traces.
	Converted 1564002 traces.
	Converted 1565002 traces.
	Converted 1566002 traces.
	Converted 1567002 traces.
	Converted 1568002 traces.
	Converted 1569002 traces.
	Converted 1570002 traces.
	Converted 1571002 traces.
	Converted 1572002 traces.
	Converted 1573002 traces.
	Converted 1574002 traces.
	Converted 1575002 traces.
	Converted 1576002 traces.
	Converted 1577002 traces.
	Converted 1578002 traces.
	Converted 1579002 traces.
	Converted 1580002 traces.
	Converted 1581002 traces.
	Converted 1582002 traces.
	Converted 1583002 traces.
	Converted 1584002 traces.
	Converted 1585002 traces.
	Converted 1586002 traces.
	Converted 1587002 traces.
	Converted 1588002 traces.
	Converted 1589002 traces.
	

	Converted 1855002 traces.
	Converted 1856002 traces.
	Converted 1857002 traces.
	Converted 1858002 traces.
	Converted 1859002 traces.
	Converted 1860002 traces.
	Converted 1861002 traces.
	Converted 1862002 traces.
	Converted 1863002 traces.
	Converted 1864002 traces.
	Converted 1865002 traces.
	Converted 1866002 traces.
	Converted 1867002 traces.
	Converted 1868002 traces.
	Converted 1869002 traces.
	Converted 1870002 traces.
	Converted 1871002 traces.
	Converted 1872002 traces.
	Converted 1873002 traces.
	Converted 1874002 traces.
	Converted 1875002 traces.
	Converted 1876002 traces.
	Converted 1877002 traces.
	Converted 1878002 traces.
	Converted 1879002 traces.
	Converted 1880002 traces.
	Converted 1881002 traces.
	Converted 1882002 traces.
	Converted 1883002 traces.
	Converted 1884002 traces.
	Converted 1885002 traces.
	Converted 1886002 traces.
	Converted 1887002 traces.
	Converted 1888002 traces.
	Converted 1889002 traces.
	Converted 1890002 traces.
	Converted 1891002 traces.
	

	Converted 2157002 traces.
	Converted 2158002 traces.
	Converted 2159002 traces.
	Converted 2160002 traces.
	Converted 2161002 traces.
	Converted 2162002 traces.
	Converted 2163002 traces.
	Converted 2164002 traces.
	Converted 2165002 traces.
	Converted 2166002 traces.
	Converted 2167002 traces.
	Converted 2168002 traces.
	Converted 2169002 traces.
	Converted 2170002 traces.
	Converted 2171002 traces.
	Converted 2172002 traces.
	Converted 2173002 traces.
	Converted 2174002 traces.
	Converted 2175002 traces.
	Converted 2176002 traces.
	Converted 2177002 traces.
	Converted 2178002 traces.
	Converted 2179002 traces.
	Converted 2180002 traces.
	Converted 2181002 traces.
	Converted 2182002 traces.
	Converted 2183002 traces.
	Converted 2184002 traces.
	Converted 2185002 traces.
	Converted 2186002 traces.
	Converted 2187002 traces.
	Converted 2188002 traces.
	Converted 2189002 traces.
	Converted 2190002 traces.
	Converted 2191002 traces.
	Converted 2192002 traces.
	Converted 2193002 traces.
	

	Converted 2459002 traces.
	Converted 2460002 traces.
	Converted 2461002 traces.
	Converted 2462002 traces.
	Converted 2463002 traces.
	Converted 2464002 traces.
	Converted 2465002 traces.
	Converted 2466002 traces.
	Converted 2467002 traces.
	Converted 2468002 traces.
	Converted 2469002 traces.
	Converted 2470002 traces.
	Converted 2471002 traces.
	Converted 2472002 traces.
	Converted 2473002 traces.
	Converted 2474002 traces.
	Converted 2475002 traces.
	Converted 2476002 traces.
	Converted 2477002 traces.
	Converted 2478002 traces.
	Converted 2479002 traces.
	Converted 2480002 traces.
	Converted 2481002 traces.
	Converted 2482002 traces.
	Converted 2483002 traces.
	Converted 2484002 traces.
	Converted 2485002 traces.
	Converted 2486002 traces.
	Converted 2487002 traces.
	Converted 2488002 traces.
	Converted 2489002 traces.
	Converted 2490002 traces.
	Converted 2491002 traces.
	Converted 2492002 traces.
	Converted 2493002 traces.
	Converted 2494002 traces.
	Converted 2495002 traces.
	

In [8]:
dataset.close()

AttributeError: 'Group' object has no attribute 'close'