In [3]:
from mdmodels import DataModel
my_model = DataModel.from_markdown("/home/sga/Code/FAIRFluids/FAIRFluids/specifications/model.md")
my_model.info()

In [4]:
class ExtendedFAIRFluidsDocument(my_model.FAIRFluidsDocument):
    def sum_num_values(self):
        """Sum all numerical property values across all fluids and store in compound 1's SELFIES field"""
        total = 0.0
        if hasattr(self, 'fluid') and self.fluid:
            for fluid in self.fluid:
                if fluid.num_value and fluid.num_value.propertyValue:
                    total += fluid.num_value.propertyValue.propValue
        
        
    def add_selfies_to_compound(self, compound_id, selfies_string):
        """Add SELFIES string to compound"""
        if hasattr(self, 'compound') and self.compound and len(self.compound) > 0:
            self.compound[compound_id].SELFIE = selfies_string
        
        
    def get_data_from_cml(self, path_to_cml):
        """Create FAIRFluids document from CML file"""
        import xml.etree.ElementTree as ET
        
        # Parse XML file
        tree = ET.parse(path_to_cml)
        root = tree.getroot()
        
        # Create dictionary to store DOIs and their associated properties
        doi_properties = {}

        # Find all experiment modules
        for experiment in root.findall(".//{http://www.xml-cml.org/schema}module[@dictRef='des:experiment']"):
            # Get properties list
            properties = {}
            for prop in experiment.find(".//{http://www.xml-cml.org/schema}propertyList"):
                prop_type = prop.get('dictRef').split(':')[1]
                scalar = prop.find(".//{http://www.xml-cml.org/schema}scalar")
                if scalar is not None:
                    properties[prop_type] = scalar.text
            
            # Get parameters which will become constraints
            parameters = {}
            param_list = experiment.find(".//{http://www.xml-cml.org/schema}parameterList")
            if param_list is not None:
                for param in param_list:
                    param_type = param.get('dictRef').split(':')[1]
                    scalar = param.find(".//{http://www.xml-cml.org/schema}scalar")
                    if scalar is not None:
                        parameters[param_type] = scalar.text

            # Add to doi_properties dictionary
            doi = properties.get('DOI')
            if doi:
                if doi not in doi_properties:
                    doi_properties[doi] = []
                doi_properties[doi].append({**properties, **parameters})

        # Create a fluid for each DOI
        for doi, experiments in doi_properties.items():
            for exp in experiments:
                constraints = []
                
                # Dynamically create constraints from parameters
                for param_name, param_value in exp.items():
                    if param_name == 'temperature':
                        constraints.append(my_model.Constraint(
                            constraint_type=my_model.ConstraintVariableType(
                                e_Temperature="Temperature, K"
                            ),
                            constraint_digits=3,
                            constraint_value=float(param_value),
                            constraint_number=len(constraints) + 1
                        ))
                    elif param_name == 'molar_ratio_of_DES':
                        # Create two instances for molar ratio
                        constraints.append(my_model.Constraint(
                            constraint_type=my_model.ConstraintVariableType(
                                e_component_composition= "'Mole fraction'"
                            ),
                            constraint_digits=3,
                            constraint_value=float(param_value),
                            constraint_number=len(constraints) + 1
                        ))
                        constraints.append(my_model.Constraint(
                            constraint_type=my_model.ConstraintVariableType(
                                e_component_composition="'Mole fraction'"
                            ),
                            constraint_digits=3,
                            constraint_value=float(param_value) ** -1,
                            constraint_number=len(constraints) + 1
                        ))
                    elif param_name == 'mole_fraction_of_water':
                        constraints.append(my_model.Constraint(
                            constraint_type=my_model.ConstraintVariableType(
                                e_component_composition="'Mole fraction'"
                            ),
                            constraint_digits=3,
                            constraint_value=float(param_value),
                            constraint_number=len(constraints) + 1
                        ))

                self.add_to_fluid(
                    source_doi=doi,
                    property=my_model.Property(
                        propertyID=f"viscosity_{exp.get('ID', '001')}",
                        property_group=my_model.Property_Group(
                            group="TransportProp",
                            method="experimental",
                            property_name="Dynamic viscosity, mPa·s"
                        )
                    ),
                    constraint=constraints,
                    num_value=my_model.NumValue(
                        propertyValue=my_model.PropertyValue(
                            propDigits=4,
                            propNumber=f"prop_{exp.get('ID', '001')}",
                            propValue=float(exp.get('value_viscosity', 0)),
                            uncertainty=float(exp.get('error_viscosity', 0)) if exp.get('error_viscosity') != 'NG' else 0
                        ),
                        variableValue=my_model.VariableValue(
                            varDigits=2,
                            varNumber=f"var_{exp.get('ID', '001')}",
                            varValue=float(exp.get('temperature', 0))
                        )
                    )
                )
        
        return self




In [5]:
class ExtendedFAIRFluidsDocument(my_model.FAIRFluidsDocument):
    def sum_num_values(self):
        """Sum all numerical property values across all fluids and store in compound 1's SELFIES field"""
        total = 0.0
        if hasattr(self, 'fluid') and self.fluid:
            for fluid in self.fluid:
                if fluid.num_value and fluid.num_value.propertyValue:
                    total += fluid.num_value.propertyValue.propValue
        
        
    def add_selfies_to_compound(self, compound_id, selfies_string):
        """Add SELFIES string to compound"""
        if hasattr(self, 'compound') and self.compound and len(self.compound) > 0:
            self.compound[compound_id].SELFIE = selfies_string
            
    def _parse_cml_file(self, path_to_cml):
        """Parse CML file and return root element"""
        import xml.etree.ElementTree as ET
        tree = ET.parse(path_to_cml)
        return tree.getroot()
        
    def _extract_properties(self, experiment):
        """Extract properties from experiment module"""
        properties = {}
        for prop in experiment.find(".//{http://www.xml-cml.org/schema}propertyList"):
            prop_type = prop.get('dictRef').split(':')[1]
            scalar = prop.find(".//{http://www.xml-cml.org/schema}scalar")
            if scalar is not None:
                properties[prop_type] = scalar.text
        return properties
        
    def _extract_parameters(self, experiment):
        """Extract parameters from experiment module"""
        parameters = {}
        param_list = experiment.find(".//{http://www.xml-cml.org/schema}parameterList")
        if param_list is not None:
            for param in param_list:
                param_type = param.get('dictRef').split(':')[1]
                scalar = param.find(".//{http://www.xml-cml.org/schema}scalar")
                if scalar is not None:
                    parameters[param_type] = scalar.text
        return parameters
        
    def _create_temperature_constraint(self, param_value, constraint_number):
        """Create temperature constraint"""
        return my_model.Constraint(
            constraint_type=my_model.ConstraintVariableType(
                e_Temperature="Temperature, K"
            ),
            constraint_digits=3,
            constraint_value=float(param_value),
            constraint_number=constraint_number
        )
        
    def _create_molar_ratio_constraints(self, param_value, constraint_number):
        """Create molar ratio constraints"""
        constraints = []
        constraints.append(my_model.Constraint(
            constraint_type=my_model.ConstraintVariableType(
                e_component_composition="'Mole fraction'"
            ),
            constraint_digits=3,
            constraint_value=float(param_value),
            constraint_number=constraint_number
        ))
        constraints.append(my_model.Constraint(
            constraint_type=my_model.ConstraintVariableType(
                e_component_composition="'Mole fraction'"
            ),
            constraint_digits=3,
            constraint_value=float(param_value) ** -1,
            constraint_number=constraint_number + 1
        ))
        return constraints
        
    def _create_water_fraction_constraint(self, param_value, constraint_number):
        """Create water fraction constraint"""
        return my_model.Constraint(
            constraint_type=my_model.ConstraintVariableType(
                e_component_composition="'Mole fraction'"
            ),
            constraint_digits=3,
            constraint_value=float(param_value),
            constraint_number=constraint_number
        )
        
    def _create_constraints(self, exp):
        """Create all constraints from experiment parameters"""
        constraints = []
        for param_name, param_value in exp.items():
            if param_name == 'temperature':
                constraints.append(self._create_temperature_constraint(param_value, len(constraints) + 1))
            elif param_name == 'molar_ratio_of_DES':
                constraints.extend(self._create_molar_ratio_constraints(param_value, len(constraints) + 1))
            elif param_name == 'mole_fraction_of_water':
                constraints.append(self._create_water_fraction_constraint(param_value, len(constraints) + 1))
        return constraints
        
    def _add_fluid_data(self, doi, exp, constraints):
        """Add fluid data to document"""
        self.add_to_fluid(
            source_doi=doi,
            property=my_model.Property(
                propertyID=f"viscosity_{exp.get('ID', '001')}",
                property_group=my_model.Property_Group(
                    group="TransportProp",
                    method="experimental",
                    property_name="Dynamic viscosity, mPa·s"
                )
            ),
            constraint=constraints,
            num_value=my_model.NumValue(
                propertyValue=my_model.PropertyValue(
                    propDigits=4,
                    propNumber=f"prop_{exp.get('ID', '001')}",
                    propValue=float(exp.get('value_viscosity', 0)),
                    uncertainty=float(exp.get('error_viscosity', 0)) if exp.get('error_viscosity') != 'NG' else 0
                ),
                variableValue=my_model.VariableValue(
                    varDigits=2,
                    varNumber=f"var_{exp.get('ID', '001')}",
                    varValue=float(exp.get('temperature', 0))
                )
            )
        )
        
    def get_data_from_cml(self, path_to_cml):
        """Create FAIRFluids document from CML file"""
        root = self._parse_cml_file(path_to_cml)
        
        # Create dictionary to store DOIs and their associated properties
        doi_properties = {}

        # Find all experiment modules
        for experiment in root.findall(".//{http://www.xml-cml.org/schema}module[@dictRef='des:experiment']"):
            properties = self._extract_properties(experiment)
            parameters = self._extract_parameters(experiment)

            # Add to doi_properties dictionary
            doi = properties.get('DOI')
            if doi:
                if doi not in doi_properties:
                    doi_properties[doi] = []
                doi_properties[doi].append({**properties, **parameters})

        # Create a fluid for each DOI
        for doi, experiments in doi_properties.items():
            for exp in experiments:
                constraints = self._create_constraints(exp)
                self._add_fluid_data(doi, exp, constraints)
        
        return self
    

# Create Document

In [6]:
# Test the extended class
doc = ExtendedFAIRFluidsDocument(
    version=my_model.Version(
        versionMajor=1,
        versionMinor=0
    )
)

# Add same data as example
doc.citation = my_model.Citation(Type="journal")
doc.citation.add_to_author(given_name="John", family_name="Doe")

# Add compounds and fluid data
doc.add_to_compound(
    pubChemID=962,
    commonName="Water",
    name_IUPAC="oxidane", 
    standard_InChI="InChI=1S/H2O/h1H2",
    standard_InChI_key="XLYOFNOQVPJJNP-UHFFFAOYSA-N"
)

doc.add_to_compound(
    pubChemID= 123345647789,
    commonName="CholinChloride",
    name_IUPAC="CholinChloride", 
    standard_InChI="InChI=1S/C5H11ClNO2/c1"
)

doc.add_to_compound(
    pubChemID=1128,
    commonName="Glycerol",
    name_IUPAC="glycerol",
    standard_InChI="InChI=1S/C3H8O3/c1-2-3-4/h2-3H,1H3",
    standard_InChI_key="XLYOFNOQVPJJNP-UHFFFAOYSA-N"
)

doc.add_to_fluid(
    components=["1"],
    property=my_model.Property(
        propertyID="density_001",
        property_group=my_model.Property_Group(
            group="volumetricProp",
            method="experimental", 
            property_name="Mass density, kg/m3"
        )
    ),
    variable=my_model.Variable(
        variableID="temp_001",
        variableName="Temperature, K",
        componentID=1
    ),
    num_value=my_model.NumValue(
        propertyValue=my_model.PropertyValue(
            propDigits=4,
            propNumber="prop_001", 
            propValue=998.2,
            uncertainty=0.1
        ),
        variableValue=my_model.VariableValue(
            varDigits=2,
            varNumber="var_001",
            varValue=293.15
        )
    )
)

doc.get_data_from_cml("../data/cml_xml/ChCl_glycerol.xml")
print(f"Sum of all numerical property values: {doc.sum_num_values()}")

# Save model to JSON file
with open('fairfluids_model.json', 'w') as f:
    f.write(doc.model_dump_json(indent=2))

Sum of all numerical property values: None


In [7]:
# Load JSON and validate with model
with open('fairfluids_model.json', 'r') as f:
    json_data = f.read()
    
# Create new document from JSON and validate
validated_doc = my_model.FAIRFluidsDocument.model_validate_json(json_data)


# Find viscosity values and corresponding temperatures and mole fractions
results = []
for fluid in validated_doc.fluid:
    # Check if property is viscosity
    if fluid.property.property_group.property_name == "Dynamic viscosity, mPa·s":
        temp = None
        mole_fractions = []
        viscosity = fluid.num_value.propertyValue.propValue
        
        # Get temperature and mole fraction constraints
        for constraint in fluid.constraint:
            # Check for temperature constraint
            if (constraint.constraint_type and 
                constraint.constraint_type.e_temperature == "Temperature, K"):
                temp = constraint.constraint_value
            # Check for mole fraction constraint
            elif (constraint.constraint_type and 
                  constraint.constraint_type.e_component_composition == "'Mole fraction'"):
                mole_fractions.append(constraint.constraint_value)
                
        results.append({
            'temperature': temp,
            'mole_fractions': mole_fractions,
            'viscosity': viscosity
        })

# Print results
for result in results:
    print(f"Temperature: {result['temperature']}K")
    print(f"Mole fractions: {result['mole_fractions']}")
    print(f"Viscosity: {result['viscosity']} mPa·s")
    print("---")

Temperature: NoneK
Mole fractions: []
Viscosity: 60.2 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 1.2739 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.9836 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.7914 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.6492 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.5487 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.477 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.4354 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.386 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.3306 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 1.5754 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 1.2065 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.9611 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.7902 mPa·s
---
Temperature: NoneK
Mole fractions: []
Viscosity: 0.6578 mPa·s
---
Temperature: N

In [8]:
results

[{'temperature': None, 'mole_fractions': [], 'viscosity': 60.2},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 1.2739},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.9836},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.7914},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.6492},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.5487},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.477},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.4354},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.386},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.3306},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 1.5754},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 1.2065},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.9611},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.7902},
 {'temperature': None, 'mole_fractions': [], 'viscosity': 0.6578},

In [9]:
print(validated_doc.model_dump_json(indent=2))

{
  "version": {
    "versionMajor": 1,
    "versionMinor": 0
  },
  "citation": {
    "Type": "journal",
    "author": [
      {
        "given_name": "John",
        "family_name": "Doe"
      }
    ]
  },
  "compound": [
    {
      "pubChemID": 962,
      "compund_identifier": null,
      "commonName": "Water",
      "SELFIE": null,
      "name_IUPAC": "oxidane",
      "standard_InChI": "InChI=1S/H2O/h1H2",
      "standard_InChI_key": "XLYOFNOQVPJJNP-UHFFFAOYSA-N"
    },
    {
      "pubChemID": 123345647789,
      "compund_identifier": null,
      "commonName": "CholinChloride",
      "SELFIE": null,
      "name_IUPAC": "CholinChloride",
      "standard_InChI": "InChI=1S/C5H11ClNO2/c1",
      "standard_InChI_key": null
    },
    {
      "pubChemID": 1128,
      "compund_identifier": null,
      "commonName": "Glycerol",
      "SELFIE": null,
      "name_IUPAC": "glycerol",
      "standard_InChI": "InChI=1S/C3H8O3/c1-2-3-4/h2-3H,1H3",
      "standard_InChI_key": "XLYOFNOQVPJJNP-UH