DOI-USGS · jlaura · Aug 26, 2018 · Aug 26, 2018 · Aug 26, 2018
diff --git a/plio/io/io_bae.py b/plio/io/io_bae.py
@@ -7,53 +7,66 @@
 import numpy as np
 import pandas as pd
 
-def socetset_keywords_to_json(keywords, ell=None):
+from plio.utils.utils import is_number, convert_string_to_float
+
+def socetset_keywords_to_dict(keywords, ell=None):
     """
     Convert a SocetCet keywords.list file to JSON
 
     Parameters
     ----------
     keywords : str
-               Path to the socetset keywords.list file
+               Path to the socetset keywords.list file or a raw string that
+               will be split on '\n' and parsed.
+
+    ell : str
+          Optional path to the ellipsoid keywords.list file or a raw string 
+          that will be split on '\n' and parsed
 
     Returns
     -------
-     : str
-       The serialized JSON string.
+     data : dict 
+            A dictionary containing the socet keywords parsed.
+
     """
-    matcher = re.compile(r'\b(?!\d)\w+\b')
-    numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?')
-    stream = {}
-
-    def parse(fi):
-        with open(fi, 'r') as f:
-            for l in f:
-                l = l.rstrip()
-                if not l:
+    data = {}
+
+    def parse(lines):
+        for l in lines:
+            l = l.strip()
+            if not l:
+                continue
+            elems = l.split()
+            if is_number(elems[0]) is False:
+                key = elems[0]
+                if key in data.keys():
+                    raise ValueError('Duplicate dictionary key: {}'.format(key))
+                data[key] = []
+                if len(elems) == 1:
                     continue
-                matches = matcher.findall(l)
-                if matches:
-                    key = matches[0]
-                    stream[key] = []
-                    # Case where the kw are strings after the key
-                    if len(matches) > 1:
-                        stream[key] = matches[1:]
-                    # Case where the kw are numeric types after the key
-                    else:
-                        nums = numeric_matcher.findall(l)
-                        if len(nums) == 1:
-                            stream[key] = float(nums[0])
-                        else:
-                            stream[key] += map(float, nums)
+                if len(elems) == 2:
+                    data[key] = convert_string_to_float(elems[1])
                 else:
-                    # Case where the values are on a newline after the key
-                    nums = numeric_matcher.findall(l)
-                    stream[key] += map(float, nums)
+                    data[key] += [convert_string_to_float(e) for e in elems[1:]]
+            else:
+                data[key] += [convert_string_to_float(e) for e in elems]
 
+    if os.path.exists(keywords):
+        with open(keywords, 'r') as f:
+            keywords = f.readlines()
+    else:
+        keywords = keywords.split('\n')
     parse(keywords)
+
     if ell:
+        if os.path.exists(ell):
+            with open(ell, 'r') as f:
+                ell = f.readlines()
+        else:
+            ell = ell.split('\n')
         parse(ell)
-    return json.dumps(stream)
+
+    return data
 
 @singledispatch
 def read_ipf(arg): # pragma: no cover

diff --git a/plio/io/tests/test_io_bae.py b/plio/io/tests/test_io_bae.py
@@ -5,7 +5,7 @@
 import pandas as pd
 from pandas.util.testing import assert_frame_equal
 
-from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf
+from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf
 from plio.examples import get_path
 
 import pytest
@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file):
 
     # np.testing.assert_array_almost_equal(truth_arr, test_arr)
 
-def test_create_from_socet_lis():
-    socetlis = get_path('socet_isd.lis')
-    socetell = get_path('ellipsoid.ell')
-    js = json.loads(socetset_keywords_to_json(socetlis))
-    assert isinstance(js, dict)  # This is essentially a JSON linter
-    # Manually validated
-    assert 'RECTIFICATION_TERMS' in js.keys()
-    assert 'SEMI_MAJOR_AXIS' in js.keys()  # From ellipsoid file
-    assert 'NUMBER_OF_EPHEM' in js.keys()
-    assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM']
+class TestISDFromSocetLis():
+
+    def test_parse_with_empty_newlines(self):
+        # Ensure all keys read when whitespace present
+        empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01
+
+T1_QUAT 1.0000000000000000000000000e-01"""
+        data = socetset_keywords_to_dict(empty_newlines)
+        assert len(data.keys()) == 2
+
+    def test_duplicate_key_check(self):
+        duplicate_keys = r"""T 1
+T 1"""
+        with pytest.raises(ValueError):
+            data = socetset_keywords_to_dict(duplicate_keys)
+
+    def test_multiple_per_line(self):
+        multiple_per_line = r"""T 1 1 1"""
+        data = socetset_keywords_to_dict(multiple_per_line)
+        assert len(data['T']) == 3
+
+    def test_key_on_different_line(self):
+        key_on_different_line = r"""A
+0.0 1.00000000000000e+00 2.00000000000000e+00
+3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
+B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
+        data = socetset_keywords_to_dict(key_on_different_line)
+        assert len(data['A']) == 6
+        assert data['A'] == [0, 1, 2, 3, 4, 5]
+
+        assert len(data['B']) == 3
+        assert data['B'] == [0.1, 2, 3]
+
+    def test_key_on_different_line_whitespace(self):
+        key_on_different_line_whitespace = r"""A
+    0.0 1.00000000000000e+00 2.00000000000000e+00
+    3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
+B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
+        data = socetset_keywords_to_dict(key_on_different_line_whitespace)
+        assert len(data['A']) == 6
+        assert data['A'] == [0, 1, 2, 3, 4, 5]
+
+        assert len(data['B']) == 3
+        assert data['B'] == [0.1, 2, 3]
diff --git a/plio/utils/utils.py b/plio/utils/utils.py
@@ -9,6 +9,48 @@
 
 import numpy as np
 
+def is_number(s):
+    """
+    Check if an argument is convertable to a number
+
+    Parameters
+    ----------
+    s : object
+        The argument to check for conversion
+
+    Returns
+    -------
+     : bool
+       True if conversion is possible, otherwise False.
+    """
+    try:
+        float(s)
+        return True
+    except ValueError:
+        return False
+
+def convert_string_to_float(s):
+    """
+    Attempt to convert a string to a float.
+
+    Parameters
+    ---------
+    s : str
+        The string to convert
+
+    Returns
+    -------
+    : float / str
+      If successful, the converted value, else the argument is passed back
+      out.
+    """
+
+    try:
+        return float(s)
+    except TypeError:
+        return s
+
+
 def metadatatoband(metadata):
     wv2band = []
     for k, v in metadata.items():