Skip to content

Commit

Permalink
Added consistency check for numpy data arrays to ensure they match ch…
Browse files Browse the repository at this point in the history
…annels when writing.
  • Loading branch information
ianneilmacleod committed Oct 19, 2017
1 parent 2996abc commit ed1acfb
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 21 deletions.
34 changes: 28 additions & 6 deletions geosoft/gxpy/gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,8 @@ def new_channel(self, name, dtype=np.float64, array=1, dup=None, details=None):
"""

symb = self._db.find_symb(name, gxapi.DB_SYMB_CHAN)
if array < 1:
array = 1
if symb == gxapi.NULLSYMB:
if dup:
symb = self._db.dup_symb_no_lock(self.channel_name_symb(dup)[1], name)
Expand Down Expand Up @@ -1694,10 +1696,14 @@ def write_channel(self, line, channel, data, fid=(0.0, 1.0), unit_of_measure=Non
if not isinstance(data, np.ndarray):
data = np.array(data)

if _va_width(data) == 0:
# no data to write
return

w = self.channel_width(cs)
if w != _va_width(data):
raise GdbException(
_t("Array data width {} does not fit into VA channel '{}' with width {}").
_t("Array data width {} does not fit into channel '{}' with width {}").
format(_va_width(data), cn, w))

# 1D channel
Expand Down Expand Up @@ -1757,7 +1763,8 @@ def write_line(self, line, data, channels=None, fid=(0.0, 1.0)):
:param line: line to write to, name or symbol
:param data: numpy array shape (records,channels). If single dimension, one channel
:param channels: channel name or symbol list, or a single name/symbol. If a single name is specified
for multi-column data, a VA channel is assumed.
for multi-column data, a VA channel is assumed. If None, a sorted list of all channels
is assumed.
:param fid: option fid tuple (start, increment), default (0.0,1.0)
.. versionadded:: 9.1
Expand All @@ -1767,9 +1774,28 @@ def write_line(self, line, data, channels=None, fid=(0.0, 1.0)):
self.write_channel(line, channels, data, fid=fid)

else:

if channels is None:
channels = self._sorted_chan_list()

if data.ndim == 1:
data = data.reshape((-1, 1))

# ensure data matches channels
np_data = 0
for chan in channels:
try:
ch, cs = self.channel_name_symb(chan)
w = self.channel_width(cs)
except GdbException:
w = 1
np_data += w

# channel - data mismatch
if data.shape[1] != np_data:
raise GdbException(_t('Data dimension ({}) does not match data required by channels ({}).').format(data.shape, channels))

# all good, write the data
np_index = 0
for chan in channels:
try:
Expand All @@ -1781,10 +1807,6 @@ def write_line(self, line, data, channels=None, fid=(0.0, 1.0)):
self.write_channel(line, cs, data[:, np_index: np_index + w], fid=fid)
np_index += w

# error if there is any data left
if np_index - data.shape[1] != 0:
raise GdbException(_t('More data than channels, but data up to channels was written out.'))

def list_values(self, chan, max=1000, selected=True, dupl=50, progress=None, stop=None):
"""
Build a list of unique values in a channel. Uniqueness depends on the current display format for
Expand Down
43 changes: 28 additions & 15 deletions geosoft/gxpy/tests/test_gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,8 @@ def test_newline_GDB(self):
pass

ch = ['a','b','c','d','e']
gdb.write_line('testline',npd,channels=ch)
self.assertRaises(gxdb.GdbException, gdb.write_line, 'testline', npd, ['xx', 'yy'])
gdb.write_line('testline', npd, channels=ch)
npd2, ch2, fid2 = gdb.read_line('testline',channels=ch)
self.assertEqual(npd.shape,npd2.shape)
self.assertEqual(ch2, ch)
Expand Down Expand Up @@ -1185,32 +1186,44 @@ def test_dup(self):
finally:
gdb.discard()

@unittest.skip('This test shows throws error now, skipping to let fixture pass')
def test_large_va(self):
@unittest.skip('skipping to let fixture pass')
def test_large_stress(self):
self.start()

try:
name = None
with gxdb.Geosoft_gdb.new('new', overwrite=True) as gdb:
with gxdb.Geosoft_gdb.new('new', overwrite=True, comp=gxdb.COMP_NONE, pageSize=64) as gdb:
name = gdb.file_name
npd = np.empty((2000000, 3)) # TODO - jacques, this works - see next one
npd[:, :] = np.nan
npd = np.zeros(1000000) #TODO, this is 8 meg of data. This should not fit in 4 meg.
line = gdb.new_line('test')
gxdb.Channel.new(gdb, 'xx', array=3)
gdb.write_line(line, npd, ['xx[0]', 'xx[1]', 'xx[2]'])
gdb.write_line(line, npd, ['xx'])
npd2, ch, fid = gdb.read_line(line)
self.assertEqual(len(ch), 3, npd.shape)
self.assertEqual(len(ch), 1)

with gxdb.Geosoft_gdb.new('new', overwrite=True, comp=gxdb.COMP_NONE, pageSize=64) as gdb:
name = gdb.file_name
npd = np.zeros(4000000) #TODO, this is 32 meg of data, reported as 4.11 meg in the error?
line = gdb.new_line('test')
gdb.write_line(line, npd, ['xx'])
npd2, ch, fid = gdb.read_line(line)
self.assertEqual(len(ch), 1)

finally:
gxdb.delete_files(name)

@unittest.skip('skipping to let fixture pass')
def test_very_large_stress(self):
self.start()

try:
name = None
with gxdb.Geosoft_gdb.new('new', overwrite=True) as gdb:
with gxdb.Geosoft_gdb.new('new', overwrite=True, comp=gxdb.COMP_NONE, pageSize=64) as gdb:
name = gdb.file_name
npd = np.empty((20000000, 3)) # TODO - this bigger one fails, but not nicely (see above, which works), same as problem reported in forum
npd[:, :] = np.nan
npd = np.zeros(1000000000) # TODO, this should NOT work. On one test I got an indexing error
line = gdb.new_line('test')
gxdb.Channel.new(gdb, 'xx', array=3) #TODO failure is when we attempt to write. Symbols are OK, but core reports invalid symbol (-1)
gdb.write_line(line, npd, ['xx[0]', 'xx[1]', 'xx[2]'])
gdb.write_line(line, npd, ['xx'])
npd2, ch, fid = gdb.read_line(line)
self.assertEqual(len(ch), 3, npd.shape)
self.assertEqual(len(ch), 1)

finally:
gxdb.delete_files(name)
Expand Down

0 comments on commit ed1acfb

Please sign in to comment.