fix issue #323, removing argument comment='#' at line 1079 in pd.read…

…_csv in the build_section method, addition of a specific test to check on the behavior
ISA-tools · Nov 23, 2018 · 6067b73 · 6067b73
1 parent 817a7ab
commit 6067b73
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 3 deletions.
diff --git a/isatools/isatab.py b/isatools/isatab.py
@@ -1076,8 +1076,8 @@ def _read_tab_section(f, sec_key, next_sec_key=None):
         return memf
 
     def _build_section_df(f):
-        df = pd.read_csv(f, names=range(0, 128), sep='\t', engine='python', encoding='utf-8',
-                         comment='#').dropna(axis=1, how='all')
+        df = pd.read_csv(f, names=range(0, 128), sep='\t', engine='python', encoding='utf-8').dropna(axis=1, how='all') #,comment='#' should fix issue323 reported by Ken, even though could require deeper investigation. only lines starting with '#' ought to be ignored
+
         df = df.T
         df.replace(np.nan, '', regex=True, inplace=True)  # Strip out the nan entries
         df.reset_index(inplace=True)  # Reset index so it is accessible as column

diff --git a/tests/test_isatab.py b/tests/test_isatab.py
@@ -17,12 +17,13 @@
 
 def setUpModule():
     if not os.path.exists(utils.DATA_DIR):
-        raise FileNotFoundError("Could not fine test data directory in {0}. Ensure you have cloned the ISAdatasets "
+        raise FileNotFoundError("Could not find test data directory in {0}. Ensure you have cloned the ISAdatasets "
                                 "repository using "
                                 "git clone -b tests --single-branch git@github.com:ISA-tools/ISAdatasets {0}"
                                 .format(utils.DATA_DIR))
 
 
+
 class TestIsaMerge(unittest.TestCase):
 
     def setUp(self):
@@ -329,6 +330,17 @@ def setUp(self):
     def tearDown(self):
         shutil.rmtree(self._tmp_dir)
 
+    def test_isatab_load_issue323(self):
+        with open(os.path.join(self._tab_data_dir, 'issue323', 'i_05.txt')) as fp:
+            ISA = isatab.load(fp)
+            print(ISA.studies[0].protocols[0].description)
+            self.assertEqual(len(ISA.studies[0].protocols[0].description), 70)
+
+        protocol = Protocol(description="some description containing a # character that should not be picked up", name="", protocol_type=OntologyAnnotation(term=""))
+        print("test protocol description", protocol.description)
+
+        self.assertEqual(len(protocol.description),70)
+
     def test_isatab_load_issue200(self):
         with open(os.path.join(self._tab_data_dir, 'issue200', 'i_Investigation.txt')) as fp:
             ISA = isatab.load(fp)
@@ -463,6 +475,8 @@ def test_isatab_load_bii_s_7(self):
             self.assertEqual(len(assay_gx.process_sequence), 116)  # 116 processes in in a_matteo-assay-Gx.txt
 
 
+
+
 class UnitTestIsaTabDump(unittest.TestCase):
     def setUp(self):
         self._tab_data_dir = utils.TAB_DATA_DIR