Skip to content

Commit

Permalink
Merge pull request #70 from ChalkLab/50-add-dataseries-to-rruff-reader
Browse files Browse the repository at this point in the history
Adds dataseries to rruff reader and writer
  • Loading branch information
JohnsonDylan committed Mar 1, 2022
2 parents f9d006b + 35dc819 commit fa23070
Show file tree
Hide file tree
Showing 7 changed files with 414 additions and 163 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,23 @@ jobs:
pip install coverage
bash <(curl -s https://codecov.io/bash)
- name: Build Python package and Upload to PyPi
- name: Build Python package and Upload to TestPyPi
shell: bash -l {0}
if: startsWith( github.ref, 'refs/tags/v') && matrix.python-version == env.PYTHON_MAIN_VERSION
env:
PYPI_TOKEN_PASSWORD: ${{ secrets.PYPI_TOKEN }}
TEST_PYPI_TOKEN_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
run: |
poetry update
poetry build
poetry publish --username "__token__" --password $PYPI_TOKEN_PASSWORD
poetry config repositories.testpypi https://test.pypi.org/legacy/
poetry publish -r testpypi --username "__token__" --password $TEST_PYPI_TOKEN_PASSWORD
- name: Build Python package and Upload to TestPyPi
- name: Build Python package and Upload to PyPi
shell: bash -l {0}
if: ${{ matrix.python-version == env.PYTHON_MAIN_VERSION }}
if: startsWith( github.ref, 'refs/tags/v') && matrix.python-version == env.PYTHON_MAIN_VERSION
env:
TEST_PYPI_TOKEN_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
PYPI_TOKEN_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
poetry update
poetry build
poetry config repositories.testpypi https://test.pypi.org/legacy/
poetry publish -r testpypi --username "__token__" --password $TEST_PYPI_TOKEN_PASSWORD
poetry publish --username "__token__" --password $PYPI_TOKEN_PASSWORD
158 changes: 157 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,161 @@ fabric.properties
# any other files in the .idea folder
.idea/*

# coverage report
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# End of https://www.toptal.com/developers/gitignore/api/python
77 changes: 29 additions & 48 deletions scidatalib/io/jcamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ def _read_get_datagroup_subsection(jcamp_dict: dict) -> List[dict]:

# Create data group
datagroup = {
"@id": "datagroup/1",
"@id": "datagroup",
"type": "sdo:datagroup",
"attribute": [
attr_count,
Expand All @@ -904,8 +904,6 @@ def _read_get_datagroup_subsection(jcamp_dict: dict) -> List[dict]:
return datagroup


# TODO: add the dataseries
# Issue: https://github.com/ChalkLab/SciDataLib/issues/43
def _read_get_dataseries_subsection(jcamp_dict: dict) -> List[dict]:
"""
Extract and translate from the JCAMP-DX dictionary the SciData JSON-LD
Expand All @@ -920,41 +918,27 @@ def _read_get_dataseries_subsection(jcamp_dict: dict) -> List[dict]:

dataseries = [
{
"@id": "dataseries/1/",
"@type": "sdo:independent",
"label": "Wave Numbers (cm^-1)",
"axis": "x-axis",
"parameter": {
"@id": "dataseries/1/parameter/",
"@type": "sdo:parameter",
"quantity": "wavenumbers",
"property": "Wave Numbers",
"valuearray": {
"@id": "dataseries/1/parameter/valuearray/",
"@type": "sdo:valuearray",
"@id": "dataseries",
"label": "Spectroscopy",
"parameter": [
{
"@id": "parameter",
"quantity": "Wave Numbers",
"property": "Wave Numbers",
"units": "1/cm",
"axis": "x-axis",
"datatype": "decimal",
"numberarray": jcamp_dict['x'],
"dataarray": [str(x) for x in jcamp_dict['x']],
"unitref": xunitref,
}
}
},
{
"@id": "dataseries/2/",
"@type": "sdo:dependent",
"label": "Intensity (Arbitrary Units)",
"axis": "y-axis",
"parameter": {
"@id": "dataseries/2/parameter/",
"@type": "sdo:parameter",
"quantity": "intensity",
"property": "Intensity",
"valuearray": {
"@id": "dataseries/2/parameter/valuearray/",
"@type": "sdo:valuearray",
}, {
"@id": "parameter",
"quantity": "Intensity (Arbitrary Units)",
"property": "Intensity (Arbitrary Units)",
"axis": "y-axis",
"datatype": "decimal",
"numberarray": jcamp_dict['y']
"dataarray": [str(y) for y in jcamp_dict['y']],
}
}
]
}
]

Expand Down Expand Up @@ -1028,8 +1012,8 @@ def _read_translate_jcamp_to_scidata(jcamp_dict: dict) -> SciData:
datagroup = _read_get_datagroup_subsection(jcamp_dict)
scidata.datagroup([datagroup])

# TODO: add the dataseries
# Issue: https://github.com/ChalkLab/SciDataLib/issues/43
dataseries = _read_get_dataseries_subsection(jcamp_dict)
scidata.dataseries(dataseries)

return scidata

Expand Down Expand Up @@ -1214,7 +1198,8 @@ def _write_add_header_lines_dataset(scidata: SciData) -> List[str]:
dataset = graph.get("scidata").get("dataset", False)

if dataset:
attributes = dataset.get("attribute", False)
datagroup = dataset.get("datagroup")[0]
attributes = datagroup.get("attribute", False)

reverse_xunit_map = {v: k for k, v in _XUNIT_MAP.items()}
scidata_xunits = attributes[0]["value"]["unitref"]
Expand Down Expand Up @@ -1308,21 +1293,18 @@ def _write_jcamp_data_section(
:param mode: File mode to use (i.e. 'w' for overwrite, 'a' for append, ...)
:param precision: Floating point number for formatting the output data
"""
pass
# TODO: add the dataseries
# Issue: https://github.com/ChalkLab/SciDataLib/issues/43
'''
graph = scidata.output.get("@graph")
dataset = graph.get("scidata").get("dataset")
dataseries = dataset.get("dataseries")

dataset = scidata.output.get("@graph").get("scidata").get("dataset")
dataseries = dataset.get("dataseries")[0]
parameters = dataseries.get("parameter")
with open(filename, mode) as fileobj:
xdata = []
ydata = []
for data in dataseries:
for data in parameters:
if data.get("axis") == "x-axis":
xdata = data["parameter"]["valuearray"]["numberarray"]
xdata = [float(x) for x in data["dataarray"]]
if data.get("axis") == "y-axis":
ydata = data["parameter"]["valuearray"]["numberarray"]
ydata = [float(y) for y in data["dataarray"]]

for x, y in zip(xdata, ydata):
line = f' {x:.{precision}f}, {y:.{precision}f}'
Expand All @@ -1331,4 +1313,3 @@ def _write_jcamp_data_section(
yline = f'{y:.{precision}f}'[0:trim]
line = f' {xline}, {yline}'
fileobj.write(f'{line}\n')
'''
4 changes: 2 additions & 2 deletions scidatalib/io/rruff.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def _read_translate_rruff_to_scidata(rruff_dict: dict) -> dict:
datagroup = jcamp._read_get_datagroup_subsection(rruff_dict)
scidata.datagroup([datagroup])

# TODO: add the dataseries
# Issue: https://github.com/ChalkLab/SciDataLib/issues/43
dataseries = jcamp._read_get_dataseries_subsection(rruff_dict)
scidata.dataseries(dataseries)

return scidata

Expand Down
23 changes: 13 additions & 10 deletions tests/io/test_jcamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,21 +597,21 @@ def test_read_get_facets_section():


def test_read_get_dataseries_subsection():
x = np.arange(0., 10., 0.1)
y = np.random.random(len(x))
xset = [str(x) for x in np.arange(0., 10., 0.1)]
yset = [str(y) for y in np.random.random(len(xset))]
jcamp_dict = {
"x": x,
"y": y,
"x": xset,
"y": yset,
"xunits": "1/CM",
}
result = jcamp._read_get_dataseries_subsection(jcamp_dict)

result_x = result[0]["parameter"]["valuearray"]["numberarray"]
result_y = result[1]["parameter"]["valuearray"]["numberarray"]
result_xunit = result[0]["parameter"]["valuearray"]["unitref"]
result_x = result[0]["parameter"][0]["dataarray"]
result_y = result[0]["parameter"][1]["dataarray"]
result_xunit = result[0]["parameter"][0]["unitref"]

assert list(x) == list(result_x)
assert list(y) == list(result_y)
assert list(xset) == list(result_x)
assert list(yset) == list(result_y)
assert "qudt:PER-CentiM" == result_xunit


Expand Down Expand Up @@ -697,6 +697,10 @@ def test_read_jcamp_function(raman_tannic_acid_file):
scidata_obj = jcamp.read_jcamp(raman_tannic_acid_file)
assert type(scidata_obj) == SciData

dataset = scidata_obj.output.get("@graph").get("scidata").get("dataset")
dataseries = dataset.get("dataseries")[0]
assert dataseries.get("@id") == "dataseries/1/"


def test_read_jcamp(raman_tannic_acid_file):
scidata_obj = read(raman_tannic_acid_file, ioformat="jcamp")
Expand Down Expand Up @@ -729,7 +733,6 @@ def test_write_jcamp_function(tmp_path, raman_tannic_acid_file):
assert result_list == target_list


@pytest.mark.skip(reason="Missing dataseries for comparison")
def test_write_jcamp(tmp_path, raman_tannic_acid_file):
scidata = jcamp.read_jcamp(raman_tannic_acid_file.resolve())
jcamp_dir = tmp_path / "jcamp"
Expand Down
1 change: 0 additions & 1 deletion tests/io/test_rruff.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def test_read_rruff(raman_soddyite_file):
assert facet["name"] == "Soddyite"


@pytest.mark.skip(reason="Missing dataseries for comparison")
def test_write_rruff(tmp_path, raman_soddyite_file):
scidata = rruff.read_rruff(raman_soddyite_file.absolute())
rruff_dir = tmp_path / "rruff"
Expand Down

0 comments on commit fa23070

Please sign in to comment.