Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ph5 validate skip empty tables and missing ph5 #481

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion ph5/core/ph5api.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,13 @@ def read_array_t(self, name):
if not self.Array_t_names:
self.read_array_t_names()
if name in self.Array_t_names:
rows, keys = self.ph5_g_sorts.read_arrays(name)
try:
rows, keys = self.ph5_g_sorts.read_arrays(name)
except TypeError as e:
if 'NoneType' in str(e):
self.Array_t[name] = {'byid': {}, 'order': [], 'keys': []}
msg = "Table %s is empty." % name
raise APIError(4, msg)
byid, order = by_id(
rows, secondary_key='channel_number_i', unique_key=False)
self.Array_t[name] = {'byid': byid, 'order': order, 'keys': keys}
Expand Down
108 changes: 79 additions & 29 deletions ph5/utilities/ph5validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class PH5Validate(object):
def __init__(self, ph5API_object, ph5path):
self.ph5 = ph5API_object
self.path = ph5path
self.miniFileNotFound = set()
if not self.ph5.Array_t_names:
self.ph5.read_array_t_names()
if not self.ph5.Experiment_t:
Expand All @@ -65,9 +66,15 @@ def __init__(self, ph5API_object, ph5path):
def read_arrays(self, name):
if name is None:
for n in self.ph5.Array_t_names:
self.ph5.read_array_t(n)
try:
self.ph5.read_array_t(n)
except ph5api.APIError as e:
LOGGER.error(e.msg)
else:
self.ph5.read_array_t(name)
try:
self.ph5.read_array_t(name)
except ph5api.APIError as e:
LOGGER.error(e.msg)

def read_events(self, name):
if name is None:
Expand Down Expand Up @@ -463,7 +470,42 @@ def check_station_completeness(self, station):
if sensor_serial is None:
warning.append("Sensor serial number is missing.")

self.ph5.read_das_t(das_serial, reread=False)
if not station['sensor/manufacturer_s']:
warning.append("Sensor manufacturer is "
"missing. Is this correct???")

if not station['sensor/model_s']:
warning.append("Sensor model is missing. "
"Is this correct???")

if not station['das/manufacturer_s']:
warning.append("DAS manufacturer is missing. "
"Is this correct???")

if not station['das/model_s']:
warning.append("DAS model is missing. "
"Is this correct???")

errmsg = ""
try:
self.ph5.read_das_t(das_serial, reread=False)
except IOError as e:
if 'does not exist' in str(e):
minifile = str(e).split("``")[1]
errmsg = str(e) + (". The file is required for Das %s."
% das_serial)
self.miniFileNotFound.add(minifile)
else:
raise e
except TypeError as e:
if "argument of type 'NoneType' is not iterable" == str(e):
errmsg = "Table Das_t_%s is empty." % das_serial
else:
raise e
if errmsg != "":
warning.append(errmsg)
return info, warning, error

sample_rate = station['sample_rate_i']
nodata_err = None
if das_serial not in self.ph5.Das_t:
Expand Down Expand Up @@ -517,9 +559,13 @@ def check_station_completeness(self, station):
check_end = das_time_list[index+1][0] - 1
i = 1
# while loop to avoid using overlaping row
while check_end < check_start:
i += 1
check_end = das_time_list[index+i][0] - 1
try:
while check_end < check_start:
i += 1
check_end = das_time_list[index+i][0] - 1
except IndexError:
# all are overlapped
check_end = das_time_list[index][1]
try:
# clear das to make sure get_extent consider channel & sr
self.ph5.forget_das_t(das_serial)
Expand Down Expand Up @@ -555,22 +601,6 @@ def check_station_completeness(self, station):
"Other channels seem to exist"
.format(str(channel_id)))

if not station['sensor/manufacturer_s']:
warning.append("Sensor manufacturer is "
"missing. Is this correct???")

if not station['sensor/model_s']:
warning.append("Sensor model is missing. "
"Is this correct???")

if not station['das/manufacturer_s']:
warning.append("DAS manufacturer is missing. "
"Is this correct???")

if not station['das/model_s']:
warning.append("DAS model is missing. "
"Is this correct???")

return info, warning, error

def analyze_time(self):
Expand Down Expand Up @@ -615,9 +645,13 @@ def analyze_time(self):
dt['min_deploy_time'] = [dt['time_windows'][0][0]]
dt['max_pickup_time'] = [max([t[1] for t in dt['time_windows']])]
# look for data outside time border of each set
true_deploy, true_pickup = self.ph5.get_extent(das=d,
component=c,
sample_rate=spr)
try:
true_deploy, true_pickup = self.ph5.get_extent(das=d,
component=c,
sample_rate=spr)
except IOError as e:
dt['min_deploy_time'].append(str(e))
continue
if true_deploy is None:
# No data found. But don't give warning here because it
# will be given in check_station_completness
Expand Down Expand Up @@ -647,6 +681,8 @@ def check_array_t(self):
self.analyze_time()
array_names = sorted(self.ph5.Array_t_names)
for array_name in array_names:
check_dup_sta_list = []
dup_sta_list = set()
arraybyid = self.ph5.Array_t[array_name]['byid']
arrayorder = self.ph5.Array_t[array_name]['order']
for ph5_station in arrayorder:
Expand All @@ -656,6 +692,10 @@ def check_array_t(self):
for st_num in range(0, station_len):
station = station_list[deployment][st_num]
station_id = station['id_s']
if station not in check_dup_sta_list:
check_dup_sta_list.append(station)
else:
dup_sta_list.add(station_id)
channel_id = station['channel_number_i']
cha_code = (station['seed_band_code_s'] +
station['seed_instrument_code_s'] +
Expand Down Expand Up @@ -710,11 +750,12 @@ def check_array_t(self):

if info or warning or error:
header = ("-=-=-=-=-=-=-=-=-\n"
"Station {0} Channel {1}\n"
"{2} error, {3} warning, "
"{4} info\n"
"{0} Station {1} Channel {2}\n"
"{3} error, {4} warning, "
"{5} info\n"
"-=-=-=-=-=-=-=-=-\n"
.format(str(station_id),
.format(str(array_name),
str(station_id),
str(channel_id),
len(error),
len(warning),
Expand All @@ -724,6 +765,10 @@ def check_array_t(self):
warning=warning,
error=error)
validation_blocks.append(vb)
if len(dup_sta_list) > 0:
msg = ("The following stations are duplicated in %s: %s"
% (array_name, ', '.join(sorted(dup_sta_list))))
LOGGER.warning(msg)
return validation_blocks

def check_event_t_completeness(self, event):
Expand Down Expand Up @@ -930,7 +975,12 @@ def main():
for vb in validation_blocks:
vb.write_to_log(log_file,
args.level)
if len(ph5validate.miniFileNotFound) != 0:
msg = "The following files are missing: %s" % ", ".join(sorted(
ph5validate.miniFileNotFound))
LOGGER.warning(msg)
ph5API_object.close()

sys.stdout.write("\nWarnings, Errors and suggestions "
"written to logfile: %s\n" % args.outfile)
except ph5api.APIError as err:
Expand Down
26 changes: 15 additions & 11 deletions ph5/utilities/tests/test_ph5validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_check_array_t(self):
if 'Station 9001' in r.heading:
self.assertEqual(r.heading,
"-=-=-=-=-=-=-=-=-\n"
"Station 9001 Channel 1\n"
"Array_t_009 Station 9001 Channel 1\n"
"4 error, 1 warning, 0 info\n"
"-=-=-=-=-=-=-=-=-\n"
)
Expand All @@ -83,7 +83,7 @@ def test_check_array_t(self):
if 'Station 0407 Channel -2' in r.heading:
self.assertEqual(r.heading,
"-=-=-=-=-=-=-=-=-\n"
"Station 0407 Channel -2\n"
"Array_t_004 Station 0407 Channel -2\n"
"1 error, 2 warning, 0 info\n"
"-=-=-=-=-=-=-=-=-\n"
)
Expand Down Expand Up @@ -135,7 +135,8 @@ def test_main(self):
'run experiment_t_gen to create table\n')
self.assertEqual(
all_logs[3],
'Station 9001 Channel 1\n2 error, 3 warning, 0 info\n')
'Array_t_009 Station 9001 Channel 1\n'
'2 error, 3 warning, 0 info\n')
self.assertEqual(
all_logs[4],
'ERROR: No Response table found. Have you run resp_load yet?\n'
Expand All @@ -146,7 +147,7 @@ def test_main(self):
'2 time(s)\n')
self.assertEqual(
all_logs[5],
'Station 9002 Channel 1\n2 error, 2 warning, 0 info\n')
'Array_t_009 Station 9002 Channel 1\n2 error, 2 warning, 0 info\n')
self.assertEqual(
all_logs[6],
'ERROR: No Response table found. Have you run resp_load yet?\n'
Expand All @@ -155,7 +156,7 @@ def test_main(self):
'WARNING: Data exists after pickup time: 36 seconds.\n')
self.assertEqual(
all_logs[7],
'Station 9003 Channel 1\n2 error, 2 warning, 0 info\n')
'Array_t_009 Station 9003 Channel 1\n2 error, 2 warning, 0 info\n')
self.assertEqual(
all_logs[8],
'ERROR: No Response table found. Have you run resp_load yet?\n'
Expand All @@ -178,21 +179,21 @@ def test_main(self):
'run experiment_t_gen to create table\n')
self.assertEqual(
all_logs[3],
'Station 9001 Channel 1\n2 error, 3 warning, 0 info\n')
'Array_t_009 Station 9001 Channel 1\n2 error, 3 warning, 0 info\n')
self.assertEqual(
all_logs[4],
'ERROR: No Response table found. Have you run resp_load yet?\n'
'ERROR: Response_t has no entry for n_i=7\n')
self.assertEqual(
all_logs[5],
'Station 9002 Channel 1\n2 error, 2 warning, 0 info\n')
'Array_t_009 Station 9002 Channel 1\n2 error, 2 warning, 0 info\n')
self.assertEqual(
all_logs[6],
'ERROR: No Response table found. Have you run resp_load yet?\n'
'ERROR: Response_t has no entry for n_i=7\n')
self.assertEqual(
all_logs[7],
'Station 9003 Channel 1\n2 error, 2 warning, 0 info\n')
'Array_t_009 Station 9003 Channel 1\n2 error, 2 warning, 0 info\n')
self.assertEqual(
all_logs[8],
'ERROR: No Response table found. Have you run resp_load yet?\n'
Expand Down Expand Up @@ -265,7 +266,8 @@ def test_check_array_t(self):
)

self.assertEqual(vb[0].heading,
'-=-=-=-=-=-=-=-=-\nStation 9001 Channel 1\n'
'-=-=-=-=-=-=-=-=-\n'
'Array_t_009 Station 9001 Channel 1\n'
'2 error, 3 warning, 0 info\n-=-=-=-=-=-=-=-=-\n')
self.assertEqual(vb[0].info, [])
self.assertEqual(
Expand All @@ -280,7 +282,8 @@ def test_check_array_t(self):
)

self.assertEqual(vb[1].heading,
'-=-=-=-=-=-=-=-=-\nStation 9002 Channel 1\n'
'-=-=-=-=-=-=-=-=-\n'
'Array_t_009 Station 9002 Channel 1\n'
'2 error, 2 warning, 0 info\n-=-=-=-=-=-=-=-=-\n')
self.assertEqual(vb[1].info, [])
self.assertEqual(
Expand All @@ -294,7 +297,8 @@ def test_check_array_t(self):
)

self.assertEqual(vb[2].heading,
'-=-=-=-=-=-=-=-=-\nStation 9003 Channel 1\n'
'-=-=-=-=-=-=-=-=-\n'
'Array_t_009 Station 9003 Channel 1\n'
'2 error, 2 warning, 0 info\n-=-=-=-=-=-=-=-=-\n')
self.assertEqual(vb[2].info, [])
self.assertEqual(
Expand Down