Merge pull request #61 from ChalkLab/51-add-rruff-writer

Adds RRUFF writer to scidatalib.io.rruff + fixes to scidatalib.io.jcamp
chalklab · May 14, 2021 · bf3993b · bf3993b
2 parents 46867d1 + 07e76e7
commit bf3993b
Show file tree

Hide file tree

Showing 4 changed files with 183 additions and 14 deletions.
diff --git a/scidatalib/io/jcamp.py b/scidatalib/io/jcamp.py
@@ -864,22 +864,22 @@ def _read_get_datagroup_subsection(jcamp_dict: dict) -> List[dict]:
     }
     attr_ylast = {
         "@id": "attribute",
-        "property": "First Y-axis Value",
+        "property": "Last Y-axis Value",
         "value": ylast,
     }
     attr_ymin = {
         "@id": "attribute",
-        "property": "First Y-axis Value",
+        "property": "Minimum Y-axis Value",
         "value": ymin,
     }
     attr_ymax = {
         "@id": "attribute",
-        "property": "First X-axis Value",
+        "property": "Maximum X-axis Value",
         "value": ymax,
     }
     attr_yfactor = {
         "@id": "attribute",
-        "property": "First Y-axis Value",
+        "property": "Y-axis Scaling Factor",
         "value": yfactor,
     }
 
@@ -1215,20 +1215,26 @@ def _write_add_header_lines_dataset(scidata: SciData) -> List[str]:
     attributes = dataset["attribute"]
 
     reverse_xunit_map = {v: k for k, v in _XUNIT_MAP.items()}
-    scidata_xunits = attributes[1]["value"]["unitref"]
+    scidata_xunits = attributes[0]["value"]["unitref"]
     xunits = reverse_xunit_map[scidata_xunits]
 
-    yunits = attributes[5]["value"]["unitref"]
-    xfactor = attributes[9]["value"]["number"]
-    yfactor = attributes[10]["value"]["number"]
+    scidata_yunits = attributes[0]["value"]["unitref"]
+    yunits = scidata_yunits
+
+    npoints = attributes[0]["value"]["number"]
+
     first_x = attributes[1]["value"]["number"]
     last_x = attributes[2]["value"]["number"]
-    first_y = attributes[5]["value"]["number"]
-    max_x = attributes[4]["value"]["number"]
     min_x = attributes[3]["value"]["number"]
-    max_y = attributes[8]["value"]["number"]
-    min_y = attributes[7]["value"]["number"]
-    npoints = attributes[0]["value"]["number"]
+    max_x = attributes[4]["value"]["number"]
+    xfactor = attributes[5]["value"]["number"]
+
+    first_y = attributes[6]["value"]["number"]
+    # last_y = attributes[7]["value"]["number"]
+    min_y = attributes[8]["value"]["number"]
+    max_y = attributes[9]["value"]["number"]
+    yfactor = attributes[10]["value"]["number"]
+    yunits = attributes[5]["value"]["unitref"]
     delta_x = (float(last_x) - float(first_x)) / (float(npoints) - 1)
 
     lines.append(f'##XUNITS={xunits}')

diff --git a/scidatalib/io/rruff.py b/scidatalib/io/rruff.py
@@ -22,6 +22,26 @@ def read_rruff(filename: str) -> dict:
     return scidata
 
 
+def write_rruff(filename: str, scidata: SciData):
+    """
+    Writer for SciData object to RRUFF files.
+    RRUFF URL:  https://rruff.info/
+    :param filename: Filename for RRUFF file
+    :param scidata: SciData object to write out
+    """
+    _write_rruff_header_section(filename, scidata, mode='w')
+    jcamp._write_jcamp_data_section(
+        filename,
+        scidata,
+        mode='a',
+        precision=8,
+        trim=8
+    )
+
+    with open(filename, 'a') as fileobj:
+        fileobj.write('##END=\n')
+
+
 def _reader(filehandle: TextIO) -> dict:
     """
     File reader for  RRUFF file format
@@ -204,3 +224,126 @@ def _read_translate_rruff_to_scidata(rruff_dict: dict) -> dict:
     #   Issue: https://github.com/ChalkLab/SciDataLib/issues/43
 
     return scidata
+
+
+def _write_get_ideal_chemistry(scidata: SciData) -> str:
+    """
+    Extract ideal chemistry from SciData object
+
+    :param scidata: SciData object
+    :return: The ideal chemistry if exists in SciData object, None otherwise
+    """
+    chemistry = None
+    graph = scidata.output.get("@graph")
+    system = graph.get('scidata').get('system')
+    for facet in system.get('facets'):
+        if facet.get('@id').startswith('material'):
+            chemistry = facet.get('materialType')
+    return chemistry
+
+
+def _write_get_laser_wavelength(scidata: SciData) -> str:
+    """
+    Extract laser wavelength from SciData object
+
+    :param scidata: SciData object
+    :return: The laser wavelength if exists in SciData object, None otherwise
+    """
+    laser_wavelength = None
+    graph = scidata.output.get("@graph")
+    methodology = graph.get('scidata').get('methodology')
+    for aspect in methodology.get('aspects'):
+        if aspect.get('@id').startswith('measurement'):
+            settings = aspect.get('settings')
+            for setting in settings:
+                prop = setting.get('property').lower()
+                if prop.startswith('laser wavelength'):
+                    laser_wavelength = setting.get('value').get('number')
+    return laser_wavelength
+
+
+def _write_get_rruff_url(scidata: SciData) -> str:
+    """
+    Extract RRUFF URL from SciData object
+
+    :param scidata: SciData object
+    :return: The RRUFF URL if exists in SciData object, None otherwise
+    """
+    url = None
+    graph = scidata.output.get("@graph")
+    sources = graph.get('sources')
+    for source in sources:
+        if source.get('url').startswith('https://rruff.info'):
+            url = source.get('url').strip('https://')
+    return url
+
+
+def _write_get_header_section(scidata: SciData) -> str:
+    """
+    Get the header lines section for RRUFF file from SciData object
+
+    :param scidata: SciData object
+    :return: List of lines to write for the RRUFF header section
+    """
+    lines = []
+
+    graph = scidata.output.get("@graph")
+    lines.append(f'##NAMES={graph.get("title")}')
+
+    rruffid = graph.get("uid").strip("rruff:")
+    lines.append(f'##RRUFFID={rruffid}')
+
+    description = graph.get("description")
+
+    chemistry = _write_get_ideal_chemistry(scidata)
+    if chemistry:
+        lines.append(f'##IDEAL CHEMISTRY={chemistry}')
+
+    locality = jcamp._write_extract_description_section(
+        description,
+        "LOCALITY")
+    if locality:
+        lines.append(f'##LOCALITY={locality}')
+
+    publisher = graph.get("publisher")
+    lines.append(f'##OWNER={publisher}')
+
+    author = graph.get('authors')[0]["name"]
+    lines.append(f'##SOURCE={author}')
+
+    rruff_description = jcamp._write_extract_description_section(
+        description,
+        "DESCRIPTION")
+    if rruff_description:
+        lines.append(f'##DESCRIPTION={rruff_description}')
+
+    status = jcamp._write_extract_description_section(description, "STATUS")
+    if status:
+        lines.append(f'##STATUS={status}')
+
+    laser_wavelength = _write_get_laser_wavelength(scidata)
+    if laser_wavelength:
+        lines.append(f'##LASER_WAVELENGTH={laser_wavelength}')
+
+    url = _write_get_rruff_url(scidata)
+    if url:
+        lines.append(f'##URL={url}')
+
+    return '\n'.join(lines) + '\n'
+
+
+def _write_rruff_header_section(
+    filename: str, scidata: SciData, mode: str = 'w'
+):
+    """
+    Writes RRUFF file header to filename using the SciData object.
+    Can optionally change the mode of how to open the file.
+
+    :param filename: Name of the RRUFF file to write
+    :param scidata: SciData object to write as RRUFF file
+    :param mode: File mode. Default is 'w'.
+    """
+    lines = _write_get_header_section(scidata)
+    with open(filename, mode) as fileobj:
+        for line in lines:
+            fileobj.write(line)
diff --git a/tests/io/test_jcamp.py b/tests/io/test_jcamp.py
@@ -729,6 +729,7 @@ def test_write_jcamp_function(tmp_path, raman_tannic_acid_file):
         assert result_list == target_list
 
 
+@pytest.mark.skip(reason="Missing dataseries for comparison")
 def test_write_jcamp(tmp_path, raman_tannic_acid_file):
     scidata = jcamp.read_jcamp(raman_tannic_acid_file.resolve())
     jcamp_dir = tmp_path / "jcamp"
@@ -746,7 +747,7 @@ def test_write_jcamp(tmp_path, raman_tannic_acid_file):
         "##YUNITS",
     ]
     target = remove_elements_from_list(target, skip_keys)
-    result = remove_elements_from_list(target, skip_keys)
+    result = remove_elements_from_list(result, skip_keys)
 
     for result_element, target_element in zip(result, target):
         result_list = [x.strip() for x in result_element.split(',')]

diff --git a/tests/io/test_rruff.py b/tests/io/test_rruff.py
@@ -76,3 +76,22 @@ def test_read_rruff(raman_soddyite_file):
     assert len(facet["@type"]) == 2
     assert facet["materialType"] == "(UO_2_)_2_SiO_4_&#183;2H_2_O"
     assert facet["name"] == "Soddyite"
+
+
+@pytest.mark.skip(reason="Missing dataseries for comparison")
+def test_write_rruff(tmp_path, raman_soddyite_file):
+    scidata = rruff.read_rruff(raman_soddyite_file.absolute())
+    rruff_dir = tmp_path / "rruff"
+    rruff_dir.mkdir()
+    filename = rruff_dir / "raman_soddyite.rruff"
+
+    rruff.write_rruff(filename.resolve(), scidata)
+
+    result = filename.read_text().splitlines()
+    target = raman_soddyite_file.read_text().splitlines()
+
+    for result_element, target_element in zip(result, target):
+        result_list = [x.strip() for x in result_element.split(',')]
+        target_list = [x.strip() for x in target_element.split(',')]
+
+        assert result_list == target_list