Skip to content

Commit

Permalink
add scripts for plotting various statistics of CNN-verified automated…
Browse files Browse the repository at this point in the history
… picks
  • Loading branch information
Richard Taylor committed Oct 29, 2020
1 parent 40a41fd commit ebc0b39
Show file tree
Hide file tree
Showing 10 changed files with 636 additions and 3 deletions.
12 changes: 12 additions & 0 deletions seismic/ml_classifier/autopick_mseed_plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from obspy.core import stream
from pathlib import Path
import os

mseed_dir = '/g/data/ha3/rlt118/neural-datasets/autopicks-mseed/'

pathlist = Path(mseed_dir).glob('**/*.mseed')

for path in pathlist:
s = stream.read(str(path))
ppath = os.path.splitext(str(path))[0]+'.png'
s.plot(outfile=ppath)
133 changes: 133 additions & 0 deletions seismic/ml_classifier/data_harvester/autopick_highsnr_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#script to collect waveforms corresponding to the automated S-wave picks and save them with an index to a directory
import sys
sys.path.append('/g/data1a/ha3/rlt118/hiperseis/')
from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet
from autopicks_approx import pickLoader
from obspy.core import UTCDateTime, Stream

fds=FederatedASDFDataSet('/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt', logger=None)


import numpy as np
import re

#global method to get a waveform from a pick
def getStream(pick,fds,mintime):
if pick[0][0]=='#': #this line is commented
return None
net=pick[6]
st=pick[7]
ch=pick[8]
time=UTCDateTime(float(pick[9]))
if mintime:
if time < mintime:
return None

starttime=time-20
endtime=time+40



if ch=='00T':

baz=float(pick[14])
#get all the horizontal short-period channels at loc '00' or '' (assuming the 00 in 00T refers to
#loc)
try:

inv=fds.get_stations(starttime,endtime,network=net,station=st)
#find appropriate channels in the inventory (ASDF returns list, not inventory type)
for chan in inv:
if re.search('^[HBS].[N1]$',chan[3]):
break
if not re.search('^[HBS].[N1]$',chan[3]):
raise Exception('no appropriate horizontal channels found')

stream=fds.get_waveforms(net,st,chan[2],chan[3],starttime,endtime)
wf1=stream[0]
wf2=fds.get_waveforms(net,st,chan[2],chan[3][0:-1]+'E',starttime,endtime)[0]
#this is a hack and wrong. Hopefully 1 and 2 are only off from N and E by a few degrees max
wf1.id=wf1.id[:-1]+'N'
wf2.id=wf2.id[:-1]+'E'
stream=Stream(traces=[wf1,wf2])
except Exception as e:
#handle data missing
print >> sys.stderr, e
stream=None

wf=None
if stream:
try:
stream=stream.rotate(method='NE->RT',back_azimuth=baz)
for trywf in stream:
if trywf.stats['channel'][-1]=='T':
wf=trywf
break
except Exception as e:
print >>sys.stderr, e
wf=None

else:
try:
#just grab the channel that the pick was on
inv=fds.get_stations(starttime,endtime,network=net,station=st,channel=ch)
loc=inv[0][2]
wf=fds.get_waveforms(net,st,loc,ch,starttime,endtime)[0]
except Exception as e:
print >> sys.stderr, e
wf=None
if wf and len(wf)>100:
wf.resample(100)
wf.detrend()
wf.normalize()
return wf
else:
print >> sys.stderr, 'Waveform not found. Skipping pick...'
return None



loadDir='/g/data/ha3/rlt118/neural-datasets/autopicks/'
saveDir='/g/data/ha3/rlt118/highSNR-autopicks/'
with open(loadDir+'ensemble.s.txt','r') as f:
picks=f.readlines()
pickCtr=len(picks)-1

with open(loadDir+'ensemble.s.verified.txt', 'r') as f:
vpicks = f.readlines()
vpicks = vpicks[1:]

vpdict = {}
for pick in vpicks:
plist = pick.split()
snr = float(plist[17])
if snr > 20:
evID = plist[0]
net = plist[6]
st = plist[7]
ch = plist[8]
vpdict[evID+net+st+ch] = 1


outfile = saveDir + "ensemble.s.unverified.txt"
with open(outfile, "w") as f:
for pick in picks:
plist = pick.split()
cwt = float(plist[18])
snr = float(plist[17])
if snr > 20:
evID = plist[0]
net=plist[6]
st=plist[7]
ch=plist[8]
if not (evID+net+st+ch in vpdict):
s = getStream(plist, fds, None)
if s:
#save mseed
cleanevID = re.sub('[^A-Za-z0-9]+', '', evID)
s.write(saveDir+cleanevID+net+st+ch+".mseed", format="MSEED")
#write pick to out file
f.write(pick+'\n')



46 changes: 46 additions & 0 deletions seismic/ml_classifier/data_harvester/autopick_hist_SNR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import matplotlib.pyplot as plt
import numpy as np

loadDir='/g/data/ha3/rlt118/neural-datasets/autopicks/'

with open(loadDir+'ensemble.s.verified.txt', 'r') as f:
vpicks = f.readlines()
vpicks = vpicks[1:]

with open(loadDir+'ensemble.s.txt', 'r') as f:
picks = f.readlines()


snrs = []
slopes = []
for pick in picks:
plist = pick.split()
snr = float(plist[17])
slope = float(plist[20])
snrs.append(snr)
slopes.append(slope)
snrs = np.array(snrs)
slopes = np.array(slopes)


vsnrs = []
for vpick in vpicks:
vplist = vpick.split()
snr = float(vplist[17])
vsnrs.append(snr)
vsnrs = np.array(vsnrs)

n1, bins1, a = plt.hist(snrs, bins = 20, range=(0,30), color="blue", edgecolor = "black")
n2, bins2, a = plt.hist(vsnrs, bins = 20, range=(0,30), color = "green", edgecolor = "black")
plt.xlabel("SNR")
plt.ylabel("Number")
plt.show()
bwidth = bins1[1] - bins1[0]
barcent = bins1 + bwidth/2
barcent = barcent[0:-1]
plt.bar(barcent,n2/n1,width=bwidth, color = "blue", edgecolor = "black")
plt.xlabel("SNR")
plt.ylabel("Acceptance ratio")
plt.show()
# plt.scatter(snrs,slopes)
# plt.show()
133 changes: 133 additions & 0 deletions seismic/ml_classifier/data_harvester/autopick_hq_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#script to collect waveforms corresponding to the automated S-wave picks and save them with an index to a directory
import sys
sys.path.append('/g/data1a/ha3/rlt118/hiperseis/')
from seismic.ASDFdatabase.FederatedASDFDataSet import FederatedASDFDataSet
from autopicks_approx import pickLoader
from obspy.core import UTCDateTime, Stream

fds=FederatedASDFDataSet('/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt', logger=None)


import numpy as np
import re

#global method to get a waveform from a pick
def getStream(pick,fds,mintime):
if pick[0][0]=='#': #this line is commented
return None
net=pick[6]
st=pick[7]
ch=pick[8]
time=UTCDateTime(float(pick[9]))
if mintime:
if time < mintime:
return None

starttime=time-20
endtime=time+40



if ch=='00T':

baz=float(pick[14])
#get all the horizontal short-period channels at loc '00' or '' (assuming the 00 in 00T refers to
#loc)
try:

inv=fds.get_stations(starttime,endtime,network=net,station=st)
#find appropriate channels in the inventory (ASDF returns list, not inventory type)
for chan in inv:
if re.search('^[HBS].[N1]$',chan[3]):
break
if not re.search('^[HBS].[N1]$',chan[3]):
raise Exception('no appropriate horizontal channels found')

stream=fds.get_waveforms(net,st,chan[2],chan[3],starttime,endtime)
wf1=stream[0]
wf2=fds.get_waveforms(net,st,chan[2],chan[3][0:-1]+'E',starttime,endtime)[0]
#this is a hack and wrong. Hopefully 1 and 2 are only off from N and E by a few degrees max
wf1.id=wf1.id[:-1]+'N'
wf2.id=wf2.id[:-1]+'E'
stream=Stream(traces=[wf1,wf2])
except Exception as e:
#handle data missing
print >> sys.stderr, e
stream=None

wf=None
if stream:
try:
stream=stream.rotate(method='NE->RT',back_azimuth=baz)
for trywf in stream:
if trywf.stats['channel'][-1]=='T':
wf=trywf
break
except Exception as e:
print >>sys.stderr, e
wf=None

else:
try:
#just grab the channel that the pick was on
inv=fds.get_stations(starttime,endtime,network=net,station=st,channel=ch)
loc=inv[0][2]
wf=fds.get_waveforms(net,st,loc,ch,starttime,endtime)[0]
except Exception as e:
print >> sys.stderr, e
wf=None
if wf and len(wf)>100:
wf.resample(100)
wf.detrend()
wf.normalize()
return wf
else:
print >> sys.stderr, 'Waveform not found. Skipping pick...'
return None



loadDir='/g/data/ha3/rlt118/neural-datasets/autopicks/'
saveDir='/g/data/ha3/rlt118/highslope-autopicks'
with open(loadDir+'ensemble.s.txt','r') as f:
picks=f.readlines()
pickCtr=len(picks)-1

with open(loadDir+'ensemble.s.verified.txt', 'r') as f:
vpicks = f.readlines()
vpicks = vpicks[1:]

vpdict = {}
for pick in vpicks:
plist = pick.split()
slope = float(plist[20])
if slope > 10:
evID = plist[0]
net = plist[6]
st = plist[7]
ch = plist[8]
vpdict[evID+net+st+ch] = 1


outfile = saveDir + "ensemble.s.unverified.txt"
with open(outfile, "w") as f:
for pick in picks:
plist = pick.split()
cwt = float(plist[18])
slope = float(plist[20])
if slope > 10:
evID = plist[0]
net=plist[6]
st=plist[7]
ch=plist[8]
if not(evID+net+st+ch in vpdict):
s = getStream(plist, fds, None)
if s:
#save mseed
cleanevID = re.sub('[^A-Za-z0-9]+', '', evID)
s.write(saveDir+cleanevID+net+st+ch+".mseed", format="MSEED")
#write pick to out file
f.write(pick+'\n')



0 comments on commit ebc0b39

Please sign in to comment.