Skip to content

Commit

Permalink
Added utility operation for getting unannotated columns
Browse files Browse the repository at this point in the history
  • Loading branch information
WardLT committed Nov 1, 2018
1 parent 363b3eb commit 4e12125
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
5 changes: 5 additions & 0 deletions dlhub_sdk/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ def annotate_column(self, column_name, description=None, data_type=None, units=N
column['units'] = units
return self

def get_unannotated_columns(self):
"""Get the names of columns that have not been described"""

return [x["name"] for x in self["dataset"]["columns"] if "description" not in x]

def _get_column(self, column_name):
"""Gets the metadata for a certain column
Expand Down
10 changes: 10 additions & 0 deletions dlhub_sdk/models/tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@ def test_serialize(self):
# Make sure the fields are the same
self.assertEqual(metadata._output, metadata_copy._output)

def test_unlabeled(self):
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'test.csv'))
m = TabularDataset.create_model(data_path)

# Check that all columns are unlabeled
self.assertEqual(["x", "y"], m.get_unannotated_columns())

# Label one, make sure it changes
m.annotate_column("x", "description")
self.assertEqual(["y"], m.get_unannotated_columns())

if __name__ == "__main__":
unittest.main()

0 comments on commit 4e12125

Please sign in to comment.