Code to make h5 files of the templates in the case that they are not saved correctly during make_templates

Created December 7, 2022

Imports and parameters

In [None]:
import pandas as pd #to work with csv
import numpy as np #for some math in stream data
import eqcorrscan #the package for templates
from eqcorrscan import Tribe #import the ability to read .tgz files aka Tribe files
import obspy #import obspy to work with streams, etc.
import h5py #for writing to h5

In [None]:
#set parameters - WILL BE UPDATED TO READ CONFIG
path = '/home/smocz/expand_redpy_new_files/templates/' #path to .tgz file
filename = 'Volcano_Rainier_Network_UW_Station_RCM_Channel_HHZ.tgz' #name of .tgz file
nbucket = 1
savepath = '/data/wsd03/redpy_template_h5/' #path to save to

In [None]:
#read in volcano metadata for station location
volc_md = pd.read_csv('/home/smocz/expand_redpy/csv_catalogs/Volcano_Metadata.csv')
#make associated netsta column
volc_md['netsta'] = volc_md['Network'].astype(str)+'.'+volc_md['Station'].astype(str)

Retreive templates (see reading_templates for more details)

In [None]:
T = Tribe().read(f'{path}{filename}') #read the .tgz file

Setup

In [None]:
#Create Dictionary data and DataFrame meta
data=np.zeros((len(T),len(T[0].st[0].data))) #size is number of templates, length of template stream data 
#(length data is same for all templates)
meta = pd.DataFrame(columns = [
    "source_id", "source_origin_time", "source_latitude_deg", "source_longitude_deg", "source_type",
    "source_depth_km", "split", "source_magnitude", "station_network_code", "trace_channel", 
    "station_code", "station_location_code", "station_latitude_deg",  "station_longitude_deg",
    "station_elevation_m", "trace_name", "trace_sampling_rate_hz", "trace_start_time",
    "trace_S_arrival_sample", "trace_P_arrival_sample", "CODE"])

Fill in data and save

In [None]:
for tt,t in enumerate(T): #for each template in the tgz file
    print(tt)
    ############################
    #gather and append metadata#
    ############################
    
    #source_id/clusterID, cl_id:
    volc = filename.split('_')[1] #get volcano name from filename
    if volc=='Baker' or volc=='Hood' or volc=='Newberry' or volc=='Rainier': #account for zfill
        cl_id = t.name[-3:] #record clusterID
    if filename.partition('_')[1]=='St_Helens':#account for zfill
        cl_id = t.name[-4:] #record clusterID
    
    #network, net:
    net = filename.split('_')[3] #get network from filename
    
    #station, sta:
    sta = filename.split('_')[5] #get station from filename
    
    #channel, chan:
    chan = filename.split('_')[-1].split('.')[0] #get channel from filename, use second split to ignore ".tgz"
    
    #latitude and longitude, lat lon:
    lat = volc_md[volc_md['netsta']==f'{net}.{sta}']['Latitude'].values.tolist()[0]
    lon = volc_md[volc_md['netsta']==f'{net}.{sta}']['Longitude'].values.tolist()[0]

    print(f'template name: {t.name}, source_id: {cl_id}, network: {net}, station: {sta}, channel: {chan}, latitude: {lat}, longitude: {lon}')
    print('----------')
    
    meta = meta.append({"source_id": cl_id, "source_origin_time": '', 
        "source_latitude_deg": "%.3f" % 0, "source_longitude_deg": "%.3f" % 0, 
        "source_type": 'unknown',
        "source_depth_km": "%.3f" % 0, "source_magnitude": 0,
        "station_network_code": net, "trace_channel": chan, 
        "station_code": sta, "station_location_code": '',
        "station_latitude_deg": lat,  "station_longitude_deg": lon,
        "station_elevation_m": 0,
        "trace_p_arrival_sample": 0, "CODE": t.name}, ignore_index = True)
    
    ########################
    #gather and append data#
    ########################
    
    data[tt] = t.st[0]
    
#     break


#save meta to csv
meta.to_csv(f"{savepath}meta_csv/{filename.split('.')[0]}.csv",sep = ',', index=False)

#write to h5
with h5py.File(f"{savepath}h5/{filename.split('.')[0]}.hdf5",'w') as f:
    f['/data'] = data
    f.close()

In [None]:
#test
display(meta)
print(data[0])
print('---')
print(data[-1])