Code to make h5 files of the templates in the case that they are not saved correctly during make_templates

Created December 7, 2022

Imports and parameters

In [10]:
import pandas as pd #to work with csv
import numpy as np #for some math in stream data
import eqcorrscan #the package for templates
from eqcorrscan import Tribe #import the ability to read .tgz files aka Tribe files
import obspy #import obspy to work with streams, etc.

In [16]:
#set parameters - WILL BE UPDATED TO READ CONFIG
path = '/home/smocz/expand_redpy_new_files/templates/' #path to .tgz file
filename = 'Volcano_Rainier_Network_UW_Station_RCM_Channel_HHZ.tgz' #name of .tgz file
nbucket = 1
savepath = '/data/wsd03/redpy_template_h5/' #path to save to

In [3]:
#read in volcano metadata for station location
volc_md = pd.read_csv('/home/smocz/expand_redpy/csv_catalogs/Volcano_Metadata.csv')
#make associated netsta column
volc_md['netsta'] = volc_md['Network'].astype(str)+'.'+volc_md['Station'].astype(str)

Setup

In [4]:
#define update_data
def update_data(data, streamdata, ibucket):
    streamdata = np.expand_dims(streamdata, axis = 0)

    if ibucket in data:
        data[ibucket] = np.concatenate((data[ibucket], streamdata), axis = 0)
    else:
        data[ibucket] = streamdata
    return data
#Create Dictionary data and DataFrame meta
data = {}
meta = pd.DataFrame(columns = [
    "source_id", "source_origin_time", "source_latitude_deg", "source_longitude_deg", "source_type",
    "source_depth_km", "split", "source_magnitude", "station_network_code", "trace_channel", 
    "station_code", "station_location_code", "station_latitude_deg",  "station_longitude_deg",
    "station_elevation_m", "trace_name", "trace_sampling_rate_hz", "trace_start_time",
    "trace_S_arrival_sample", "trace_P_arrival_sample", "CODE"])

Retreive templates (see reading_templates for more details)

In [5]:
T = Tribe().read(f'{path}{filename}') #read the .tgz file

Fill in data and save

In [14]:
for t in T: #for each template in the tgz file
    
    ############################
    #gather and append metadata#
    ############################
    
    #source_id/clusterID, cl_id:
    volc = filename.split('_')[1] #get volcano name from filename
    if volc=='Baker' or volc=='Hood' or volc=='Newberry' or volc=='Rainier': #account for zfill
        cl_id = t.name[-3:] #record clusterID
    if filename.partition('_')[1]=='St_Helens':#account for zfill
        cl_id = t.name[-4:] #record clusterID
    
    #network, net:
    net = filename.split('_')[3] #get network from filename
    
    #station, sta:
    sta = filename.split('_')[5] #get station from filename
    
    #channel, chan:
    chan = filename.split('_')[-1].split('.')[0] #get channel from filename, use second split to ignore ".tgz"
    
    #latitude and longitude, lat lon:
    lat = volc_md[volc_md['netsta']==f'{net}.{sta}']['Latitude'].values.tolist()[0]
    lon = volc_md[volc_md['netsta']==f'{net}.{sta}']['Longitude'].values.tolist()[0]

    print(f'template name: {t.name}, source_id: {cl_id}, network: {net}, station: {sta}, channel: {chan}, latitude: {lat}, longitude: {lon}')
    print('----------')
    
    meta = meta.append({"source_id": cl_id, "source_origin_time": '', 
        "source_latitude_deg": "%.3f" % 0, "source_longitude_deg": "%.3f" % 0, 
        "source_type": 'unknown',
        "source_depth_km": "%.3f" % 0, "source_magnitude": 0,
        "station_network_code": net, "trace_channel": chan, 
        "station_code": sta, "station_location_code": '',
        "station_latitude_deg": lat,  "station_longitude_deg": lon,
        "station_elevation_m": 0,
        "trace_p_arrival_sample": 0, "CODE": t.name}, ignore_index = True)
    
    ########################
    #gather and append data#
    ########################
    
    ibucket = np.random.choice(list(np.arange(nbucket) + 1))
    data = update_data(data, t.st[0], ibucket) #t.st[0] is the template stream
    print('ibucket: ',ibucket)
    print(data[ibucket])
    
#     break


#save meta to csv
meta.to_csv(f"{savepath}meta_csv/{filename.split('.')[0]}.csv",sep = ',', index=False)

#write to h5

f = h5py.File(f"{savepath}h5/{filename.split('.')[0]}.hdf5",'a') #appending mode
    #If the file does not exist, it creates a new file for writing.
# need to define f in order to close it in order to open it in mode w
if f: f.close()
f = h5py.File(f"{savepath}h5/{filename.split('.')[0]}.hdf5",'w') #writing mode
#         f['/data_format/component_order'] ='ZNE' #dunno what this does
print(range(nbucket))
for b in range(nbucket):
    f['/data/bucket%d' % (b + 1)] = data[b + 1]
f.close()

template name: rcmhhzrpra000, source_id: 000, network: UW, station: RCM, channel: HHZ, latitude: 46.83564, longitude: -121.732979
----------
ibucket:  1
[[ 3.06273407  2.54875182  0.67549879 ... -1.34218141 -1.5543479
  -1.1136041 ]
 [ 3.06273407  2.54875182  0.67549879 ... -1.34218141 -1.5543479
  -1.1136041 ]]


  meta = meta.append({"source_id": cl_id, "source_origin_time": '',


# code below here is just for reference

In [20]:
#display meta to check on it
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     display(meta)

display(meta)

Unnamed: 0,source_id,source_origin_time,source_latitude_deg,source_longitude_deg,source_type,source_depth_km,split,source_magnitude,station_network_code,trace_channel,...,station_latitude_deg,station_longitude_deg,station_elevation_m,trace_name,trace_sampling_rate_hz,trace_start_time,trace_S_arrival_sample,trace_P_arrival_sample,CODE,trace_p_arrival_sample
0,000,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra000,0.0
1,001,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra001,0.0
2,002,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra002,0.0
3,003,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra003,0.0
4,004,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra004,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366,409,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra409,0.0
367,000,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra000,0.0
368,000,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra000,0.0
369,000,,0.000,0.000,unknown,0.000,,0,UW,HHZ,...,46.83564,-121.732979,0,,,,,,rcmhhzrpra000,0.0


From make_templates:

In [None]:
# fill in data
ibucket = np.random.choice(list(np.arange(nbucket) + 1))
data = update_data(data, st3[-1], ibucket) #st3 is the template stream
print('ibucket: ',ibucket,data[ibucket])

In [None]:
# saving the file (and a csv)
meta.to_csv("/data/whd02/Data_rp/metadata_"+volc_list_names[vv]+"_"+net+"_"+sta+".csv",sep = ',', index=False)
f = h5py.File("/data/whd02/Data_rp/waveforms_"+volc_list_names[vv]+"_"+net+"_"+sta+".hdf5",'a') #appending mode
    #If the file does not exist, it creates a new file for writing.
# need to define f in order to close it in order to open it in mode w
if f: f.close()
f = h5py.File("/data/whd02/Data_rp/waveforms_"+volc_list_names[vv]+"_"+net+"_"+sta+".hdf5",'w') #writing mode
#         f['/data_format/component_order'] ='ZNE' #dunno what this does
print(range(nbucket))
for b in range(nbucket):
    f['/data/bucket%d' % (b + 1)] = data[b + 1]
f.close()