Merge pull request #70 from ChalkLab/50-add-dataseries-to-rruff-reader

Adds dataseries to rruff reader and writer
chalklab · Mar 1, 2022 · fa23070 · fa23070
2 parents f9d006b + 35dc819
commit fa23070
Show file tree

Hide file tree

Showing 7 changed files with 414 additions and 163 deletions.
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
@@ -48,23 +48,23 @@ jobs:
             pip install coverage
             bash <(curl -s https://codecov.io/bash)
 
-      - name: Build Python package and Upload to PyPi
+      - name: Build Python package and Upload to TestPyPi
         shell: bash -l {0}
         if: startsWith( github.ref, 'refs/tags/v') && matrix.python-version == env.PYTHON_MAIN_VERSION
         env:
-          PYPI_TOKEN_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+          TEST_PYPI_TOKEN_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
         run: |
           poetry update
           poetry build
-          poetry publish --username "__token__" --password $PYPI_TOKEN_PASSWORD
+          poetry config repositories.testpypi https://test.pypi.org/legacy/
+          poetry publish -r testpypi --username "__token__" --password $TEST_PYPI_TOKEN_PASSWORD
 
-      - name: Build Python package and Upload to TestPyPi
+      - name: Build Python package and Upload to PyPi
         shell: bash -l {0}
-        if: ${{ matrix.python-version == env.PYTHON_MAIN_VERSION }}
+        if: startsWith( github.ref, 'refs/tags/v') && matrix.python-version == env.PYTHON_MAIN_VERSION
         env:
-          TEST_PYPI_TOKEN_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
+          PYPI_TOKEN_PASSWORD: ${{ secrets.PYPI_TOKEN }}
         run: |
           poetry update
           poetry build
-          poetry config repositories.testpypi https://test.pypi.org/legacy/
-          poetry publish -r testpypi --username "__token__" --password $TEST_PYPI_TOKEN_PASSWORD
+          poetry publish --username "__token__" --password $PYPI_TOKEN_PASSWORD
diff --git a/.gitignore b/.gitignore
@@ -75,5 +75,161 @@ fabric.properties
 # any other files in the .idea folder
 .idea/*
 
-# coverage report
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
 .coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# End of https://www.toptal.com/developers/gitignore/api/python
diff --git a/scidatalib/io/jcamp.py b/scidatalib/io/jcamp.py
@@ -885,7 +885,7 @@ def _read_get_datagroup_subsection(jcamp_dict: dict) -> List[dict]:
 
     # Create data group
     datagroup = {
-        "@id": "datagroup/1",
+        "@id": "datagroup",
         "type": "sdo:datagroup",
         "attribute": [
             attr_count,
@@ -904,8 +904,6 @@ def _read_get_datagroup_subsection(jcamp_dict: dict) -> List[dict]:
     return datagroup
 
 
-# TODO: add the dataseries
-#   Issue: https://github.com/ChalkLab/SciDataLib/issues/43
 def _read_get_dataseries_subsection(jcamp_dict: dict) -> List[dict]:
     """
     Extract and translate from the JCAMP-DX dictionary the SciData JSON-LD
@@ -920,41 +918,27 @@ def _read_get_dataseries_subsection(jcamp_dict: dict) -> List[dict]:
 
     dataseries = [
         {
-            "@id": "dataseries/1/",
-            "@type": "sdo:independent",
-            "label": "Wave Numbers (cm^-1)",
-            "axis": "x-axis",
-            "parameter": {
-                "@id": "dataseries/1/parameter/",
-                "@type": "sdo:parameter",
-                "quantity": "wavenumbers",
-                "property": "Wave Numbers",
-                "valuearray": {
-                    "@id": "dataseries/1/parameter/valuearray/",
-                    "@type": "sdo:valuearray",
+            "@id": "dataseries",
+            "label": "Spectroscopy",
+            "parameter": [
+                {
+                    "@id": "parameter",
+                    "quantity": "Wave Numbers",
+                    "property": "Wave Numbers",
+                    "units": "1/cm",
+                    "axis": "x-axis",
                     "datatype": "decimal",
-                    "numberarray": jcamp_dict['x'],
+                    "dataarray": [str(x) for x in jcamp_dict['x']],
                     "unitref": xunitref,
-                }
-            }
-        },
-        {
-            "@id": "dataseries/2/",
-            "@type": "sdo:dependent",
-            "label": "Intensity (Arbitrary Units)",
-            "axis": "y-axis",
-            "parameter": {
-                "@id": "dataseries/2/parameter/",
-                "@type": "sdo:parameter",
-                "quantity": "intensity",
-                "property": "Intensity",
-                "valuearray": {
-                    "@id": "dataseries/2/parameter/valuearray/",
-                    "@type": "sdo:valuearray",
+                }, {
+                    "@id": "parameter",
+                    "quantity": "Intensity (Arbitrary Units)",
+                    "property": "Intensity (Arbitrary Units)",
+                    "axis": "y-axis",
                     "datatype": "decimal",
-                    "numberarray": jcamp_dict['y']
+                    "dataarray": [str(y) for y in jcamp_dict['y']],
                 }
-            }
+            ]
         }
     ]
 
@@ -1028,8 +1012,8 @@ def _read_translate_jcamp_to_scidata(jcamp_dict: dict) -> SciData:
     datagroup = _read_get_datagroup_subsection(jcamp_dict)
     scidata.datagroup([datagroup])
 
-    # TODO: add the dataseries
-    #   Issue: https://github.com/ChalkLab/SciDataLib/issues/43
+    dataseries = _read_get_dataseries_subsection(jcamp_dict)
+    scidata.dataseries(dataseries)
 
     return scidata
 
@@ -1214,7 +1198,8 @@ def _write_add_header_lines_dataset(scidata: SciData) -> List[str]:
     dataset = graph.get("scidata").get("dataset", False)
 
     if dataset:
-        attributes = dataset.get("attribute", False)
+        datagroup = dataset.get("datagroup")[0]
+        attributes = datagroup.get("attribute", False)
 
         reverse_xunit_map = {v: k for k, v in _XUNIT_MAP.items()}
         scidata_xunits = attributes[0]["value"]["unitref"]
@@ -1308,21 +1293,18 @@ def _write_jcamp_data_section(
     :param mode: File mode to use (i.e. 'w' for overwrite, 'a' for append, ...)
     :param precision: Floating point number for formatting the output data
     """
-    pass
-    # TODO: add the dataseries
-    #   Issue: https://github.com/ChalkLab/SciDataLib/issues/43
-    '''
-    graph = scidata.output.get("@graph")
-    dataset = graph.get("scidata").get("dataset")
-    dataseries = dataset.get("dataseries")
+
+    dataset = scidata.output.get("@graph").get("scidata").get("dataset")
+    dataseries = dataset.get("dataseries")[0]
+    parameters = dataseries.get("parameter")
     with open(filename, mode) as fileobj:
         xdata = []
         ydata = []
-        for data in dataseries:
+        for data in parameters:
             if data.get("axis") == "x-axis":
-                xdata = data["parameter"]["valuearray"]["numberarray"]
+                xdata = [float(x) for x in data["dataarray"]]
             if data.get("axis") == "y-axis":
-                ydata = data["parameter"]["valuearray"]["numberarray"]
+                ydata = [float(y) for y in data["dataarray"]]
 
         for x, y in zip(xdata, ydata):
             line = f' {x:.{precision}f},   {y:.{precision}f}'
@@ -1331,4 +1313,3 @@ def _write_jcamp_data_section(
                 yline = f'{y:.{precision}f}'[0:trim]
                 line = f' {xline},   {yline}'
             fileobj.write(f'{line}\n')
-    '''
diff --git a/scidatalib/io/rruff.py b/scidatalib/io/rruff.py
@@ -220,8 +220,8 @@ def _read_translate_rruff_to_scidata(rruff_dict: dict) -> dict:
     datagroup = jcamp._read_get_datagroup_subsection(rruff_dict)
     scidata.datagroup([datagroup])
 
-    # TODO: add the dataseries
-    #   Issue: https://github.com/ChalkLab/SciDataLib/issues/43
+    dataseries = jcamp._read_get_dataseries_subsection(rruff_dict)
+    scidata.dataseries(dataseries)
 
     return scidata
 

diff --git a/tests/io/test_jcamp.py b/tests/io/test_jcamp.py
@@ -597,21 +597,21 @@ def test_read_get_facets_section():
 
 
 def test_read_get_dataseries_subsection():
-    x = np.arange(0., 10., 0.1)
-    y = np.random.random(len(x))
+    xset = [str(x) for x in np.arange(0., 10., 0.1)]
+    yset = [str(y) for y in np.random.random(len(xset))]
     jcamp_dict = {
-        "x": x,
-        "y": y,
+        "x": xset,
+        "y": yset,
         "xunits": "1/CM",
     }
     result = jcamp._read_get_dataseries_subsection(jcamp_dict)
 
-    result_x = result[0]["parameter"]["valuearray"]["numberarray"]
-    result_y = result[1]["parameter"]["valuearray"]["numberarray"]
-    result_xunit = result[0]["parameter"]["valuearray"]["unitref"]
+    result_x = result[0]["parameter"][0]["dataarray"]
+    result_y = result[0]["parameter"][1]["dataarray"]
+    result_xunit = result[0]["parameter"][0]["unitref"]
 
-    assert list(x) == list(result_x)
-    assert list(y) == list(result_y)
+    assert list(xset) == list(result_x)
+    assert list(yset) == list(result_y)
     assert "qudt:PER-CentiM" == result_xunit
 
 
@@ -697,6 +697,10 @@ def test_read_jcamp_function(raman_tannic_acid_file):
     scidata_obj = jcamp.read_jcamp(raman_tannic_acid_file)
     assert type(scidata_obj) == SciData
 
+    dataset = scidata_obj.output.get("@graph").get("scidata").get("dataset")
+    dataseries = dataset.get("dataseries")[0]
+    assert dataseries.get("@id") == "dataseries/1/"
+
 
 def test_read_jcamp(raman_tannic_acid_file):
     scidata_obj = read(raman_tannic_acid_file, ioformat="jcamp")
@@ -729,7 +733,6 @@ def test_write_jcamp_function(tmp_path, raman_tannic_acid_file):
         assert result_list == target_list
 
 
-@pytest.mark.skip(reason="Missing dataseries for comparison")
 def test_write_jcamp(tmp_path, raman_tannic_acid_file):
     scidata = jcamp.read_jcamp(raman_tannic_acid_file.resolve())
     jcamp_dir = tmp_path / "jcamp"

diff --git a/tests/io/test_rruff.py b/tests/io/test_rruff.py
@@ -78,7 +78,6 @@ def test_read_rruff(raman_soddyite_file):
     assert facet["name"] == "Soddyite"
 
 
-@pytest.mark.skip(reason="Missing dataseries for comparison")
 def test_write_rruff(tmp_path, raman_soddyite_file):
     scidata = rruff.read_rruff(raman_soddyite_file.absolute())
     rruff_dir = tmp_path / "rruff"