Skip to content

Commit

Permalink
Merge pull request #355 from hkchekc/df_compat
Browse files Browse the repository at this point in the history
Dataframe compatibility test
  • Loading branch information
achilleas-k committed Dec 13, 2018
2 parents 864cbc3 + cb1cb9c commit b2064da
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 28 deletions.
29 changes: 22 additions & 7 deletions nixio/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from inspect import isclass
from six import string_types
from collections import OrderedDict # using it for python2.7
import sys

from .util import find as finders
from .compression import Compression
Expand Down Expand Up @@ -190,7 +191,15 @@ def create_data_array(self, name, array_type, dtype=None, shape=None,
return da

def create_data_frame(self, name, type_, col_dict=None, col_names=None,
col_dtypes=None, data=None, compression=Compression.No):
col_dtypes=None, data=None,
compression=Compression.No):

if isinstance(col_dict, dict) and not \
isinstance(col_dict, OrderedDict) and sys.version_info[0] < 3:
raise TypeError("Python 2 users should use name_list "
"or OrderedDict created with LIST and TUPLES"
" to create DataFrames as the order "
"of the columns cannot be maintained in Py2")

if data is not None:
shape = len(data)
Expand All @@ -201,14 +210,17 @@ def create_data_frame(self, name, type_, col_dict=None, col_names=None,
if col_dict is None:
if col_names is not None:
if col_dtypes is not None:
col_dict = OrderedDict((str(nam), dt) for nam, dt in zip(col_names, col_dtypes))
col_dict = OrderedDict((str(nam), dt)
for nam, dt in zip(col_names, col_dtypes))
elif col_dtypes is None and data is not None:
col_dtypes = []
for x in data[0]:
col_dtypes.append(type(x))
col_dict = OrderedDict((str(nam), dt) for nam, dt in zip(col_names, col_dtypes))
col_dict = OrderedDict((str(nam), dt)
for nam, dt in zip(col_names, col_dtypes))
else: # col_dtypes is None and data is None
raise (ValueError, "The data type of each column have to be specified")
raise (ValueError,
"The data type of each column have to be specified")
else: # if col_names is None
if data is not None and type(data[0]) == np.void:
col_dtype = data[0].dtype
Expand All @@ -220,12 +232,15 @@ def create_data_frame(self, name, type_, col_dict=None, col_names=None,
raw_dt_list = [ele[0] for ele in raw_dt]
col_dict = OrderedDict(zip(cn, raw_dt_list))

else: # data is None or type(data[0]) != np.void /data_type doesnt matter
raise (ValueError, "No information about column names is provided!")
else:
# data is None or type(data[0]) != np.void /data_type doesnt matter
raise (ValueError,
"No information about column names is provided!")

if col_dict is not None:
for nam, dt in col_dict.items():
if isclass(dt) and any(issubclass(dt, st) for st in string_types):
if isclass(dt) and any(issubclass(dt, st)
for st in string_types):
col_dict[nam] = util.vlen_str_dtype
dt_arr = list(col_dict.items())
col_dtype = np.dtype(dt_arr)
Expand Down
1 change: 0 additions & 1 deletion nixio/data_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,3 @@ def df_shape(self):
df_shape = tuple(df_shape)
self._h5group.set_attr("df_shape", df_shape)
return self._h5group.get_attr("df_shape")

62 changes: 42 additions & 20 deletions nixio/test/test_data_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from six import string_types
from collections import OrderedDict
import sys


class TestDataFrame(unittest.TestCase):
Expand All @@ -15,24 +16,34 @@ def setUp(self):
self.testfilename = os.path.join(self.tmpdir.path, "dataframetest.nix")
self.file = nix.File.open(self.testfilename, nix.FileMode.Overwrite)
self.block = self.file.create_block("test block", "recordingsession")
di = OrderedDict([('name', np.int64), ('id', str), ('time', float), ('sig1', np.float64), ('sig2', np.int32)])
arr = [(1, "a", 20.18, 5.0, 100), (2, 'b', 20.09, 5.5, 101), (2, 'c', 20.05, 5.1, 100)
,(1, "d", 20.15, 5.3, 150), (2, 'e', 20.23, 5.7, 200), (2, 'f', 20.07, 5.2, 300), (1, "g", 20.12, 5.1, 39),
(1, "h", 20.27, 5.1, 600), (2, 'i', 20.15, 5.6, 400), (2, 'j', 20.08, 5.1, 200)]
di = OrderedDict([('name', np.int64), ('id', str), ('time', float),
('sig1', np.float64), ('sig2', np.int32)])
arr = [(1, "a", 20.18, 5.0, 100), (2, 'b', 20.09, 5.5, 101),
(2, 'c', 20.05, 5.1, 100), (1, "d", 20.15, 5.3, 150),
(2, 'e', 20.23, 5.7, 200), (2, 'f', 20.07, 5.2, 300),
(1, "g", 20.12, 5.1, 39), (1, "h", 20.27, 5.1, 600),
(2, 'i', 20.15, 5.6, 400), (2, 'j', 20.08, 5.1, 200)]
other_arr = np.arange(11101, 11200).reshape((33, 3))
other_di = OrderedDict({'name': np.int64, 'id': int, 'time': float})
self.df1 = self.block.create_data_frame("test df", "signal1",
data=arr, col_dict=di)
self.df2 = self.block.create_data_frame("other df", "signal2",
data=arr, col_dict=di)
self.df3 = self.block.create_data_frame("reference df", "signal3",
data=other_arr, col_dict=other_di)
data=other_arr, col_dict=other_di)
self.dtype = self.df1._h5group.group["data"].dtype

def tearDown(self):
self.file.close()
self.tmpdir.cleanup()

def create_with_list(self):
arr = np.arange(999).reshape((333, 3))
namelist = np.array(['name', 'id', 'time'])
dtlist = np.array([int, str, float])
new_df = self.blk.create_data_frame('test1', 'for_test',
col_names=namelist, col_dtypes=dtlist, data=arr)

def test_data_frame_eq(self):
assert self.df1 == self.df1
assert not self.df1 == self.df2
Expand All @@ -41,11 +52,12 @@ def test_data_frame_eq(self):
assert self.df2 is not None

def test_create_with_list(self):
arr = [(1, 'a', 20.18, 5.1, 100), (2, 'b', 20.09, 5.5, 101), (2, 'c', 20.05, 5.1, 100)]
arr = [(1, 'a', 20.18, 5.1, 100), (2, 'b', 20.09, 5.5, 101),
(2, 'c', 20.05, 5.1, 100)]
namelist = np.array(['name', 'id', 'time', 'sig1', 'sig2'])
dtlist = np.array([np.int64, str, float, np.float64, np.int32])
df_li = self.block.create_data_frame("test_list", "make_of_list", data=arr,
col_names=namelist, col_dtypes=dtlist)
df_li = self.block.create_data_frame("test_list", "make_of_list",
data=arr, col_names=namelist, col_dtypes=dtlist)
assert df_li.column_names == self.df1.column_names
assert df_li.dtype == self.df1.dtype
for i in df_li[:]:
Expand All @@ -60,15 +72,15 @@ def test_data_frame_type(self):
def test_write_row(self):
# test write single row
row = ["1", 'abc', 3, 4.4556356242341, 5.1111111]
self.assertAlmostEqual(list(self.df1[9]),
[2, 'j', 20.08, 5.1, 200])
self.assertAlmostEqual(list(self.df1[9]), [2, 'j', 20.08, 5.1, 200])
self.df1.write_rows([row], [9])
assert list(self.df1[9]) == [1, 'abc', 3., 4.4556356242341, 5]
self.assertIsInstance(self.df1[9]['name'], np.integer)
self.assertIsInstance(self.df1[9]['sig2'], np.int32)
assert self.df1[9]['sig2'] == int(5)
# test write multiple rows
multi_rows = [[1775, '12355', 1777, 1778, 1779], [1785, '12355', 1787, 1788, 1789]]
multi_rows = [[1775, '12355', 1777, 1778, 1779],
[1785, '12355', 1787, 1788, 1789]]
self.df1.write_rows(multi_rows, [1, 2])
assert list(self.df1[1]) == [1775, '12355', 1777, 1778, 1779]
assert list(self.df1[2]) == [1785, '12355', 1787, 1788, 1789]
Expand Down Expand Up @@ -113,7 +125,8 @@ def test_read_cell(self):
assert crcell == 'j'
# test error raise if only one param given
self.assertRaises(ValueError, lambda: self.df1.read_cell(row_idx=10))
self.assertRaises(ValueError, lambda: self.df1.read_cell(col_name='sig1'))
self.assertRaises(ValueError,
lambda: self.df1.read_cell(col_name='sig1'))

def test_write_cell(self):
# write cell by position
Expand All @@ -123,21 +136,25 @@ def test_write_cell(self):
self.df1.write_cell('test', col_name='id', row_idx=3)
assert self.df1[3]['id'] == 'test'
# test error raise
self.assertRaises(ValueError, lambda: self.df1.write_cell(11, col_name='sig1'))
self.assertRaises(ValueError,
lambda: self.df1.write_cell(11, col_name='sig1'))

def test_append_column(self):
y = np.arange(start=16000, stop=16010, step=1)
self.df1.append_column(y, name='trial_col', datatype=int)
assert self.df1.column_names == ('name', 'id', 'time', 'sig1', 'sig2', 'trial_col')
assert self.df1.column_names == \
('name', 'id', 'time', 'sig1', 'sig2', 'trial_col')
assert len(self.df1.dtype) == 6
k = np.array(self.df1[0:10]["trial_col"], dtype=np.int64)
np.testing.assert_almost_equal(k, y)
# too short coulmn
sh_col = np.arange(start=16000, stop=16100, step=1)
self.assertRaises(ValueError, lambda: self.df1.append_column(sh_col, name='sh_col'))
self.assertRaises(ValueError, lambda:
self.df1.append_column(sh_col, name='sh_col'))
# too long column
long = np.arange(start=16000, stop=16500, step=1)
self.assertRaises(ValueError, lambda: self.df1.append_column(long, name='long'))
self.assertRaises(ValueError, lambda:
self.df1.append_column(long, name='long'))

def test_append_rows(self):
# append single row
Expand All @@ -147,25 +164,30 @@ def test_append_rows(self):
# append multi-rows
mrows = [[1, '2', 3, 4, 5], [6, 'testing', 8, 9, 10]]
self.df1.append_rows(mrows)
assert [list(i) for i in self.df1[-2:]] == [[1, '2', 3., 4., 5], [6, 'testing', 8., 9., 10]]
assert [list(i) for i in self.df1[-2:]] == \
[[1, '2', 3., 4., 5], [6, 'testing', 8., 9., 10]]
# append row with incorrect length
errrow = [5, 6, 7, 8]
self.assertRaises(ValueError, lambda: self.df1.append_rows([errrow]))

def test_unit(self):
assert self.df1.units is None
self.df1.units = ["s", 'A', 'ms', 'Hz', 'mA']
np.testing.assert_array_equal(self.df1.units, np.array(["s", 'A', 'ms', 'Hz', 'mA']))
np.testing.assert_array_equal(self.df1.units,
np.array(["s", 'A', 'ms', 'Hz', 'mA']))
assert self.df2.units is None

def test_df_shape(self):
assert tuple(self.df1.df_shape) == (10, 5)
# create df with incorrect dimension to see if Error is raised
arr = np.arange(1000).reshape(10, 10, 10)
self.assertRaises(ValueError, lambda:
self.block.create_data_frame('err', 'err', {'name': np.int64}, data=arr))
if sys.version_info[0] == 3:
self.assertRaises(ValueError,
lambda: self.block.create_data_frame('err', 'err',
{'name': np.int64}, data=arr))

def test_data_type(self):
assert self.df1.dtype[4] == np.int32
assert self.df1.dtype[0] != self.df1.dtype[4]
assert self.df1.dtype[2] == self.df1.dtype[3]

47 changes: 47 additions & 0 deletions nixio/test/test_nix_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import numpy as np
import tempfile
import pytest
from collections import OrderedDict

import nixio as nix
from .xcompat.compile import maketests
Expand Down Expand Up @@ -125,6 +126,30 @@ def _test_data_arrays(tmpdir):
# validate(nixfilepath)


def _test_data_frames(tmpdir):
nixfilepath = os.path.join(str(tmpdir), "frametest.nix")
nix_file = nix.File.open(nixfilepath, mode=nix.FileMode.Overwrite)
print(nixfilepath, nix_file)
blk = nix_file.create_block("testblock", "blocktype")
grp = blk.create_group("testgroup", "grouptype")
arr = np.arange(999).reshape((333, 3))

for idx in range(7):
cn = []
dt_list = []
di = dict(zip(cn, dt_list))
di = {'name': int, 'id': str, 'time': float}
arr = np.arange(999).reshape((333, 3))
df = blk.create_data_frame("df_" + str(idx), "dataframe", col_dict=di,
data=arr)
df.definition = "da definition " + str(idx)
df.force_created_at(np.random.randint(1000000000))
df.label = "data label " + str(idx)

nix_file.close()
# validate(nixfilepath)


def _test_tags(tmpdir):
nixfilepath = os.path.join(str(tmpdir), "tagtest.nix")
nix_file = nix.File.open(nixfilepath, mode=nix.FileMode.Overwrite)
Expand Down Expand Up @@ -480,6 +505,13 @@ def test_full_file(tmpdir):
group = block.groups[0]
group.data_arrays.append(da)

df = block.create_data_frame("adataframe", "4-column df",
col_dict=OrderedDict([('name', str), ('id',
int), ('time', float), ('Adjusted', bool)]),
data=[["Bob", 9, 11.28, False],
["Jane", 10, 14.37, True]])
df.append_rows([["Alice", 2, 3.7, False]])

featda = block.create_data_array("feat-da", "tag-feature",
data=[0.4, 0.41, 0.49, 0.1, 0.1, 0.1])

Expand Down Expand Up @@ -709,6 +741,21 @@ def test_full_file_read(tmpdir):
compare(nix.DimensionType.Set, dim.dimension_type)
compare(["a", "b"], dim.labels)

# Data Frame
df = block.data_frames[0]
compare("table", df.name)
compare("filing", df.type)
dt = (nix.util.util.vlen_str_dtype,
nix.DataType.Double, nix.DataType.Int64, nix.DataType.Bool)
compare(dt, df.dtype)
col_name = ("str", "Double", "int64", "bool")
compare(col_name, df.column_names)
combine_dt = np.dtype([(n, dty) for n, dty in zip(col_name, dt)])
arr = np.array([(b"exp1", 42.1, 10, False),
(b"exp2", 30.2, 4, True)], dtype=combine_dt)
compare(arr, df[:])
# could not test shape because it will write data

# Tag
tag = block.tags[0]
compare("tagu", tag.name)
Expand Down
22 changes: 22 additions & 0 deletions nixio/test/xcompat/readdataframes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "testutil.hpp"
#include <nix.hpp>

int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "Please specify a nix file (and nothing else)" << std::endl;
return 1;
}
std::string fname = argv[1];
nix::File nf = nix::File::open(fname, nix::FileMode::ReadOnly);

int idx = 0, errcount = 0;
std::string expname, expdef;
for (const auto &block : nf.blocks()) {
expname = "test_block" + nix::util::numToStr(idx);
expdef = "definition data_frame " + nix::util::numToStr(idx++);
errcount += compare(expname, block.name());
errcount += compare("blocktype", block.type());
errcount += compare(expdef, block.definition());
}
return errcount;
}
26 changes: 26 additions & 0 deletions nixio/test/xcompat/readfullfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,32 @@ int main(int argc, char* argv[]) {
setdim = dim;
errcount += compare({"a", "b"}, setdim.labels());

// DataFrame
auto df = block.getDataFrame(0);
size_t n = 3;
std::vector<float_t> dou_out(n);
std::vector<std::string> str_out(n);
errcount += compare("adataframe", df.name());
errcount += compare("4-column df", df.type());
errcount += compare(nix::ndsize_t{n}, df.rows());
errcount += compare(df.readRow(nix::ndsize_t{0}), {nix::Variant("Bob"),
nix::Variant(int64_t(9)), nix::Variant(11.28), nix::Variant(false)});
df.readColumn(0, str_out);
errcount += compare(str_out, {"Bob", "Jane", "Alice"});
df.readColumn(2, dou_out);
errcount += compare(dou_out, {11.28, 14.37, 3.7});
nix::ndsize_t rown = 1;
std::vector<nix::Cell> cells = df.readCells(rown, {"name", "Adjusted"});
std::vector<nix::Cell> def_cells = {
{"name", nix::Variant{"Jane"}},
{"Adjusted", nix::Variant{true}}
};
errcount += compare(cells, def_cells);
std::vector<nix::Column> cols = df.columns();
for(size_t i = 0; i < cols.size(); i++){
errcount += compare("", cols[i].unit);
}

// Tag
auto tag = block.getTag(0);
errcount += compare("tagu", tag.name());
Expand Down
18 changes: 18 additions & 0 deletions nixio/test/xcompat/writefullfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,22 @@ int main(int argc, char* argv[]) {
group = block.getGroup(0);
group.addDataArray(da);

// Data Frame
std::vector<nix::Column> cols = {{"str", "", nix::DataType::String}
,{"Double", "A", nix::DataType::Double},
{"int64", "ms", nix::DataType::Int64},
{"bool", "", nix::DataType::Bool}};
auto df = block.createDataFrame("table", "filing", cols);
std::vector<nix::Variant> vals = {nix::Variant("exp1"),
nix::Variant(42.1), nix::Variant(10), nix::Variant(false)};
df.rows(1);
df.writeRow(0, vals);
group.addDataFrame(df);
df.rows(2);
vals = {nix::Variant("exp2"),
nix::Variant(30.2), nix::Variant(4), nix::Variant(true)};
df.writeRow(1, vals);

datadbl = {0.4, 0.41, 0.49, 0.1, 0.1, 0.1};
auto featda = block.createDataArray("feat-da", "tag-feature", nix::DataType::Double, nix::NDSize{6});
featda.setData(nix::DataType::Double, datadbl.data(), nix::NDSize{6}, nix::NDSize{0});
Expand All @@ -77,6 +93,8 @@ int main(int argc, char* argv[]) {
group.addTag(tag);
tag.createFeature(featda, nix::LinkType::Untagged);



auto mtag = block.createMultiTag("mtagu", "multi tagging", block.createDataArray("tag-data", "multi-tagger", nix::DataType::Double, nix::NDSize{1, 3}));
datadbl = {0, 0.1, 10.1};
// MultiTag positions array
Expand Down

0 comments on commit b2064da

Please sign in to comment.