Skip to content

Commit

Permalink
data: Handle intermittent OSErrors when reading h5
Browse files Browse the repository at this point in the history
This is a workaround for randomly occuring read errors.

If no compression is used, they look like this (parts of resp. tracebacks):
"""
[...]
    self.id.read(mspace, fspace, arr, mtype, dxpl=self._dxpl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5d.pyx", line 181, in h5py.h5d.DatasetID.read
  File "h5py/_proxy.pyx", line 130, in h5py._proxy.dset_rw
  File "h5py/_proxy.pyx", line 84, in h5py._proxy.H5PY_H5Dread
OSError: Can't read data (wrong B-tree signature)
"""

Using LZF compression, they instead look like this (ZLIB similar):
"""
[...]
    self.id.read(mspace, fspace, arr, mtype, dxpl=self._dxpl)
  File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py/h5d.pyx", line 181, in h5py.h5d.DatasetID.read
  File "h5py/_proxy.pyx", line 130, in h5py._proxy.dset_rw
  File "h5py/_proxy.pyx", line 84, in h5py._proxy.H5PY_H5Dread
OSError: Can't read data (Invalid data for LZF decompression)
  • Loading branch information
mdraw committed Jan 19, 2018
1 parent b080dba commit e1a55ed
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions elektronn3/data/cnndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import sys
import time
import traceback
from typing import Tuple

import h5py
Expand Down Expand Up @@ -103,6 +104,7 @@ def __init__(self, input_path=None, target_path=None,
self._valid_count = None
self.n_successful_warp = 0
self.n_failed_warp = 0
self.n_read_failures = 0

self.load_data()
self._mean = mean
Expand Down Expand Up @@ -133,11 +135,10 @@ def __getitem__(self, index):
if self.grey_augment_channels is None:
self.grey_augment_channels = []
self._reseed()
input_src_coords, target_src_coords = self._getcube(self.source) # get cube randomly

input_src, target_src = self._getcube(self.source) # get cube randomly
while True:
try:
inp, target = self.warp_cut(input_src_coords, target_src_coords, self.warp, self.warp_args)
inp, target = self.warp_cut(input_src, target_src, self.warp, self.warp_args)
except transformations.WarpingOOBError:
self.n_failed_warp += 1
if self.n_failed_warp > 20 and self.n_failed_warp > 2 * self.n_successful_warp:
Expand All @@ -150,6 +151,26 @@ def __getitem__(self, index):
'Consider lowering the warping strength.'
)
continue
# TODO: Actually find out what's causing those.
except OSError:
if self.n_read_failures > self.n_successful_warp:
logger.error(
'Encountered more OSErrors than successful samples\n'
f'(Counted {self.n_read_failures} errors.)\n'
'There is probably something wrong with your HDF5 '
'files. Aborting...'
)
raise RuntimeError
self.n_read_failures += 1
traceback.print_exc()
logger.warning(
'\nUnhandled OSError while reading data from HDF5 file.\n'
f' input: {input_src.file.filename}\n'
f' target: {target_src.file.filename}\n'
'Continuing with next sample. For details, see the '
'traceback above.\n'
)
continue
self.n_successful_warp += 1
if self.normalize:
inp = (inp - self.mean) / self.std
Expand Down

0 comments on commit e1a55ed

Please sign in to comment.