In [12]:
from openff.evaluator.properties import Density

In [13]:
schema = Density.default_simulation_schema()

In [14]:
Density.default_simulation_schema().workflow_schema.protocol_schemas[0]

<openff.evaluator.workflow.schemas.ProtocolSchema at 0x767e226ea4d0>

In [15]:
schema.get_attributes()

['absolute_tolerance', 'relative_tolerance', 'workflow_schema']

In [16]:
schema.workflow_schema.get_attributes()

['protocol_schemas',
 'protocol_replicators',
 'final_value_source',
 'outputs_to_store']

In [17]:
schema.workflow_schema.protocol_schemas[0].get_attributes()

['id', 'type', 'inputs']

In [18]:
for s in schema.workflow_schema.protocol_schemas:
    print(s.id,"\n",s.type,"\n",s.inputs,"\n")

build_coordinates 
 BuildCoordinatesPackmol 
 {'.allow_merging': True, '.max_molecules': 1000, '.count_exact_amount': True, '.mass_density': <Quantity(0.95, 'gram / milliliter')>, '.box_aspect_ratio': [1.0, 1.0, 1.0], '.substance': <ProtocolPath full_path=global.substance>, '.tolerance': <Quantity(2.0, 'angstrom')>, '.verbose_packmol': False, '.retain_packmol_files': False} 

assign_parameters 
 BaseBuildSystem 
 {'.allow_merging': True, '.force_field_path': <ProtocolPath full_path=global.force_field_path>, '.coordinate_file_path': <ProtocolPath full_path=build_coordinates.coordinate_file_path>, '.substance': <ProtocolPath full_path=build_coordinates.output_substance>} 

energy_minimisation 
 OpenMMEnergyMinimisation 
 {'.allow_merging': True, '.input_coordinate_file': <ProtocolPath full_path=build_coordinates.coordinate_file_path>, '.parameterized_system': <ProtocolPath full_path=assign_parameters.parameterized_system>, '.tolerance': <Quantity(10.0, 'kilojoules_per_mole / nanometer')>

In [19]:
schema.workflow_schema.protocol_schemas[6].inputs

{'.allow_merging': True,
 '.time_series_statistics': <ProtocolPath full_path=conditional_group/average_density.time_series_statistics>,
 '.input_observables': <ProtocolPath full_path=conditional_group/production_simulation.observables>}

In [20]:
schema.workflow_schema.final_value_source.protocol_ids

('conditional_group', 'average_density')

In [21]:
schema.workflow_schema.outputs_to_store

{'full_system': <openff.evaluator.storage.data.StoredSimulationData at 0x767e226e91d0>}

In [22]:
from openff.evaluator import unit
from openff.evaluator.workflow import Workflow, ProtocolPath
from openff.evaluator.workflow.schemas import WorkflowSchema
from openff.evaluator.protocols.simulation import ProductionSimulation
from openff.evaluator.protocols.analysis import AnalyzeTrajectory
from openff.evaluator.properties.density import Density

class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 1) -> WorkflowSchema:
        # Get the default Density workflow schema
        default_schema = Density.default_simulation_schema(replicas)

        # Initialize a new schema
        schema = WorkflowSchema()

        # Step 1: Copy build_coordinates from the default schema
        build_coordinates = default_schema.workflow_schema.protocol_schemas[0]
        schema.protocol_schemas.append(build_coordinates)

        # Step 2: Copy assign_parameters from the default schema
        assign_parameters = default_schema.workflow_schema.protocol_schemas[1]
        schema.protocol_schemas.append(assign_parameters)

        # Step 3: Copy energy_minimisation from the default schema
        energy_minimization = default_schema.workflow_schema.protocol_schemas[2]
        schema.protocol_schemas.append(energy_minimization)

        # Step 4: Copy equilibration_simulation from the default schema
        equilibration = default_schema.workflow_schema.protocol_schemas[3]
        schema.protocol_schemas.append(equilibration)

        # Step 5: Custom production simulation with frequent density checks
        production = ProductionSimulation("production_simulation")
        production.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", equilibration.id
        )
        production.input_force_field = ProtocolPath(
            "parameterized_system", assign_parameters.id
        )
        production.steps = 1000000  # Adjust as needed for production
        production.check_interval = 10000  # Check density every 10,000 steps
        schema.protocol_schemas.append(production)

        # Step 6: Frequent density checks
        for i in range(0, production.steps, production.check_interval):
            density_check = Density("density_check_{}".format(i))
            density_check.input_coordinate_file = ProtocolPath(
                "output_coordinate_file", equilibration.id
            )
            density_check.input_force_field = ProtocolPath(
                "parameterized_system", assign_parameters.id
            )
            density_check.steps = production.check_interval
            density_check.output_to_store = ProtocolPath(
                "output_density", production.id
            )
            schema.protocol_schemas.append(density_check)

        # Step 7: Final production run if density is converged
        final_production = ProductionSimulation("final_production_simulation")
        final_production.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", equilibration.id
        )
        final_production.input_force_field = ProtocolPath(
            "parameterized_system", assign_parameters.id
        )
        final_production.steps = 2500000  # 5 ns run assuming 2 fs timestep
        final_production.conditional_group = "density_converged"
        schema.protocol_schemas.append(final_production)

        # Step 8: Analysis step to check if density has converged
        density_analysis = AnalyzeTrajectory("density_analysis")
        density_analysis.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", production.id
        )
        density_analysis.input_trajectory_path = ProtocolPath(
            "output_trajectory_path", production.id
        )
        density_analysis.output_to_store = ProtocolPath(
            "output_density", production.id
        )
        density_analysis.conditional_group = "density_converged"
        schema.protocol_schemas.append(density_analysis)

        return schema

# Example usage
if __name__ == "__main__":
    schema = CustomDensityWorkflow.default_simulation_schema()
    workflow = Workflow()
    workflow.schema = schema

    # Assign global variables and execute workflow as needed.


ImportError: cannot import name 'ProtocolPath' from 'openff.evaluator.workflow' (/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/workflow/__init__.py)

In [None]:
default_schema = Density.default_simulation_schema()

# Initialize a new schema
schema = WorkflowSchema()
build_coordinates = default_schema.workflow_schema.protocol_schemas[0]


In [None]:
default_schema.workflow_schema.protocol_schemas[0].get_attributes()

['id', 'type', 'inputs']

In [None]:
import os
from rdkit import Chem
from rdkit.Chem import rdDistGeom
from openff.evaluator import unit
from openff.evaluator.client import EvaluatorClient, RequestOptions
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.workflow import Workflow
from openff.evaluator.workflow.utils import ProtocolPath
from openff.evaluator.workflow.schemas import WorkflowSchema
from openff.evaluator.protocols.utils import generate_simulation_protocols
from openff.evaluator.properties.density import Density
from openff.evaluator.substances import Substance, Component, MoleFraction
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.toolkit.typing.engines.smirnoff import ForceField

from openff.evaluator.workflow.schemas import WorkflowSchema, ProtocolPath
from openff.evaluator.protocols.analysis import AverageObservable
from openff.evaluator.utils.observables import ObservableType, ObservableArray
from openff.units import unit as openff_unit

# Define the custom density workflow
class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 3) -> WorkflowSchema:
        # Create the analysis protocol

        # Create the analysis protocol
        analysis_protocol = AverageObservable("average_density")
        analysis_protocol.observable = ObservableArray([ObservableType.Density])

        
        # Generate the default simulation protocols
        protocols, _, _ = generate_simulation_protocols(
            analysis_protocol,
            use_target_uncertainty=True,
            id_suffix=""
        )

        schema = WorkflowSchema()
        schema.protocol_schemas.extend(protocols)

        # Custom production simulation with frequent density checks
        production_protocol = protocols.production_simulation
        production_protocol.steps_per_iteration = 10000  # Adjusted for shorter simulation time
        production_protocol.output_frequency = 1000  # Check density every 1,000 steps

        # Add frequent density checks
        for i in range(0, production_protocol.steps_per_iteration, production_protocol.output_frequency):
            density_check = Density("density_check_{}".format(i))
            density_check.input_coordinate_file = ProtocolPath(
                "output_coordinate_file", protocols.equilibration_simulation.id
            )
            density_check.input_force_field = ProtocolPath(
                "parameterized_system", protocols.assign_parameters.id
            )
            density_check.steps = production_protocol.output_frequency
            density_check.output_to_store = ProtocolPath(
                "output_density", production_protocol.id
            )
            schema.protocol_schemas.append(density_check)

        # Add a final production run if density is converged
        final_production = generate_simulation_protocols(
            analysis_protocol,
            use_target_uncertainty=False,
            id_suffix="final"
        ).production_simulation
        final_production.steps_per_iteration = 50000  # 0.1 ns run for testing
        final_production.conditional_group = "density_converged"
        schema.protocol_schemas.append(final_production)

        # Analysis step to check if density has converged
        density_analysis = Density("density_analysis")
        density_analysis.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", production_protocol.id
        )
        density_analysis.input_trajectory_path = ProtocolPath(
            "output_trajectory_path", production_protocol.id
        )
        density_analysis.output_to_store = ProtocolPath(
            "output_density", production_protocol.id
        )
        density_analysis.conditional_group = "density_converged"
        schema.protocol_schemas.append(density_analysis)

        return schema


# Define the substance from rdkit
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
benzene = Substance()
benzene.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Define the thermodynamic state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=1.0 * unit.atmosphere
)

# Define the force field
forcefield = ForceField("openff_unconstrained-2.1.0.offxml")
forcefield_path = "openff_unconstrained-2.1.0.offxml"

# Define the workflow
schema = CustomDensityWorkflow.default_simulation_schema()
workflow = Workflow()
workflow.schema = schema

# Define evaluator client
client = EvaluatorClient()

# Define request options
options = RequestOptions()
options.calculation_layers = [workflow]

# Add global variables
workflow.schema.global_protocols["force_field_path"] = forcefield_path
workflow.schema.global_protocols["coordinate_file_path"] = "build_coordinates"

# Create and submit the estimation request
request = client.request_estimate(
    properties=[CustomDensityWorkflow()],
    substances=[benzene],
    thermodynamic_states=[thermodynamic_state],
    options=options,
)

# Wait for results
results = request.results()
print(results)


TypeError: The value must be a unit-wrapped integer, float or numpy array.

In [None]:
import os
from rdkit import Chem
from rdkit.Chem import rdDistGeom
from openff.evaluator import unit
from openff.evaluator.client import EvaluatorClient, RequestOptions
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.workflow import Workflow
from openff.evaluator.workflow.utils import ProtocolPath

from openff.evaluator.workflow.schemas import WorkflowSchema
from openff.evaluator.properties.density import Density
from openff.evaluator.protocols.coordinates import BuildCoordinatesPackmol
from openff.evaluator.protocols.forcefield import BuildSmirnoffSystem
from openff.evaluator.protocols.openmm import OpenMMSimulation, OpenMMEnergyMinimisation

from openff.evaluator.utils.observables import ObservableType, ObservableArray
from openff.evaluator.substances import Substance, Component, MoleFraction
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.toolkit.typing.engines.smirnoff import ForceField

# Define the custom density workflow
class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 3) -> WorkflowSchema:
        schema = WorkflowSchema()

        # Step 1: Build coordinates
        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 1000
        build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
        schema.protocol_schemas.append(build_coordinates)

        # Step 2: Assign parameters
        assign_parameters = BuildSmirnoffSystem("assign_parameters")
        assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
        assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
        schema.protocol_schemas.append(assign_parameters)

        # Step 3: Energy minimization
        energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
        energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", assign_parameters.id)
        energy_minimization.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        schema.protocol_schemas.append(energy_minimization)

        # Step 4: Equilibration simulation
        equilibration_simulation = OpenMMSimulation("equilibration_simulation")
        equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
        equilibration_simulation.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        equilibration_simulation.steps = 100000
        schema.protocol_schemas.append(equilibration_simulation)

        # Step 5: Production simulation with frequent checks
        production_simulation = OpenMMSimulation("production_simulation")
        production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
        production_simulation.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        production_simulation.steps = 10000  # Shorter simulation time for testing
        schema.protocol_schemas.append(production_simulation)

        # Step 6: Frequent density checks
        for i in range(0, production_simulation.steps, 1000):
            density_check = AverageObservable(f"density_check_{i}")
            density_check.observable = ObservableType.Density
            density_check.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
            density_check.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
            density_check.input_trajectory_path = ProtocolPath("trajectory_file_path", production_simulation.id)
            schema.protocol_schemas.append(density_check)

        # Step 7: Final production run after density convergence
        final_production = OpenMMSimulation("final_production_simulation")
        final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
        final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        final_production.steps = 50000  # 0.1 ns run for testing
        final_production.conditional_group = "density_converged"
        schema.protocol_schemas.append(final_production)

        # Analysis step to check if density has converged
        density_analysis = AverageObservable("density_analysis")
        density_analysis.observable = ObservableType.Density
        density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
        density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", production_simulation.id)
        density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", production_simulation.id)
        density_analysis.output_to_store = ProtocolPath("output_density", production_simulation.id)
        density_analysis.conditional_group = "density_converged"
        schema.protocol_schemas.append(density_analysis)

        return schema

# Define the substance from rdkit
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
benzene = Substance()
benzene.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Define the thermodynamic state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=1.0 * unit.atmosphere
)

# Define the force field
forcefield = ForceField("openff_unconstrained-2.1.0.offxml")
forcefield_path = "openff_unconstrained-2.1.0.offxml"

# Define the workflow
schema = CustomDensityWorkflow.default_simulation_schema()
workflow = Workflow()
workflow.schema = schema

# Define evaluator client
client = EvaluatorClient()

# Define request options
options = RequestOptions()
options.calculation_layers = [workflow]

# Add global variables
workflow.schema.global_protocols["force_field_path"] = forcefield_path
workflow.schema.global_protocols["coordinate_file_path"] = "build_coordinates"

# Create and submit the estimation request
request = client.request_estimate(
    properties=[CustomDensityWorkflow()],
    substances=[benzene],
    thermodynamic_states=[thermodynamic_state],
    options=options,
)

# Wait for results
results = request.results()
print(results)


TypeError: The value must be a unit-wrapped integer, float or numpy array.

In [None]:
schema = WorkflowSchema()

# Step 1: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.max_molecules = 1000
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
# schema.protocol_schemas.append(build_coordinates)

In [None]:
schema.get_attributes()

['protocol_schemas',
 'protocol_replicators',
 'final_value_source',
 'outputs_to_store']

In [None]:
schema.protocol_schemas.append(build_coordinates)

In [None]:
import os
from rdkit import Chem
from rdkit.Chem import rdDistGeom
from openff.evaluator import unit
from openff.evaluator.client import EvaluatorClient, RequestOptions
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.workflow import Workflow
from openff.evaluator.workflow.utils import ProtocolPath

from openff.evaluator.workflow.schemas import WorkflowSchema
from openff.evaluator.properties.density import Density
from openff.evaluator.protocols.coordinates import BuildCoordinatesPackmol
from openff.evaluator.protocols.forcefield import BuildSmirnoffSystem
from openff.evaluator.protocols.openmm import OpenMMSimulation, OpenMMEnergyMinimisation

from openff.evaluator.utils.observables import ObservableType, ObservableArray
from openff.evaluator.substances import Substance, Component, MoleFraction
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.toolkit.typing.engines.smirnoff import ForceField

# Define the custom density workflow
class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 3) -> WorkflowSchema:
        schema = WorkflowSchema()

        # Step 1: Build coordinates
        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 1000
        build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
        schema.protocol_schemas.append(build_coordinates)

        # Step 2: Assign parameters
        assign_parameters = BuildSmirnoffSystem("assign_parameters")
        assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
        assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
        schema.protocol_schemas.append(assign_parameters)

        # Step 3: Energy minimization
        energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
        energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", assign_parameters.id)
        energy_minimization.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        schema.protocol_schemas.append(energy_minimization)

        # Step 4: Equilibration simulation
        equilibration_simulation = OpenMMSimulation("equilibration_simulation")
        equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
        equilibration_simulation.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        equilibration_simulation.steps = 100000
        schema.protocol_schemas.append(equilibration_simulation)

        return schema

def run_short_simulation(client, workflow, benzene, thermodynamic_state, forcefield_path, steps):
    # Define the simulation protocol
    production_simulation = OpenMMSimulation("short_production_simulation")
    production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "energy_minimisation")
    production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    production_simulation.steps = steps

    workflow.schema.protocol_schemas.append(production_simulation)
    
    # Add global variables
    workflow.schema.global_protocols["force_field_path"] = forcefield_path
    workflow.schema.global_protocols["coordinate_file_path"] = "build_coordinates"

    # Create and submit the estimation request
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )
    
    # Wait for results
    results = request.results()
    
    return results[0].value

# Define the substance from rdkit
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
benzene = Substance()
benzene.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Define the thermodynamic state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=1.0 * unit.atmosphere
)

# Define the force field
forcefield = ForceField("openff_unconstrained-2.1.0.offxml")
forcefield_path = "openff_unconstrained-2.1.0.offxml"

# Define the evaluator client
client = EvaluatorClient()

# Initial workflow
schema = CustomDensityWorkflow.default_simulation_schema()
workflow = Workflow()
workflow.schema = schema

# Run short simulations until convergence
convergence_tolerance = 0.01  # g/mL
max_iterations = 10
steps = 10000  # Adjusted for shorter simulation time

previous_density = None
for iteration in range(max_iterations):
    current_density = run_short_simulation(client, workflow, benzene, thermodynamic_state, forcefield_path, steps)
    if previous_density is not None:
        if abs(current_density - previous_density) < convergence_tolerance:
            print(f"Density has converged: {current_density} g/mL")
            break
    previous_density = current_density
    print(f"Iteration {iteration + 1}: Current density = {current_density} g/mL")

# Run final production simulation if converged
if abs(current_density - previous_density) < convergence_tolerance:
    # Final production run
    final_production_simulation = OpenMMSimulation("final_production_simulation")
    final_production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "equilibration_simulation")
    final_production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    final_production_simulation.steps = 50000  # 0.1 ns run for testing

    workflow.schema.protocol_schemas.append(final_production_simulation)

    # Create and submit the estimation request for final production run
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )

    # Wait for results
    results = request.results()
    final_density = results[0].value
    print(f"Final density after production run: {final_density} g/mL")
else:
    print("Density did not converge within the maximum number of iterations.")


TypeError: Workflow.__init__() missing 1 required positional argument: 'global_metadata'

In [None]:

class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 3) -> WorkflowSchema:
        schema = WorkflowSchema()

        # Step 1: Build coordinates
        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 1000
        build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
        schema.protocol_schemas.append(build_coordinates)

        # Step 2: Assign parameters
        assign_parameters = BuildSmirnoffSystem("assign_parameters")
        assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
        assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
        schema.protocol_schemas.append(assign_parameters)

        # Step 3: Energy minimization
        energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
        energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", assign_parameters.id)
        energy_minimization.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        schema.protocol_schemas.append(energy_minimization)

        # Step 4: Equilibration simulation
        equilibration_simulation = OpenMMSimulation("equilibration_simulation")
        equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
        equilibration_simulation.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        equilibration_simulation.steps = 100000
        schema.protocol_schemas.append(equilibration_simulation)

        return schema

def run_short_simulation(client, workflow, benzene, thermodynamic_state, forcefield_path, steps):
    # Define the simulation protocol
    production_simulation = OpenMMSimulation("short_production_simulation")
    production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "energy_minimisation")
    production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    production_simulation.steps = steps

    workflow.schema.protocol_schemas.append(production_simulation)
    
    # Add global variables
    workflow.schema.global_protocols["force_field_path"] = forcefield_path
    workflow.schema.global_protocols["coordinate_file_path"] = "build_coordinates"

    # Create and submit the estimation request
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )
    
    # Wait for results
    results = request.results()
    
    return results[0].value

# Define the substance from rdkit
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
benzene = Substance()
benzene.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Define the thermodynamic state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=1.0 * unit.atmosphere
)

# Define the force field
forcefield = ForceField("openff_unconstrained-2.1.0.offxml")
forcefield_path = "openff_unconstrained-2.1.0.offxml"

# Define the evaluator client
client = EvaluatorClient()
# Initial workflow
target_uncertainty = 0.1 * unit.grams / unit.milliliters
schema = CustomDensityWorkflow.default_simulation_schema()
global_metadata = {
            "thermodynamic_state": Density.thermodynamic_state,
            # "substance": Density.substance,
            # "components": benzene,
            # "target_uncertainty": target_uncertainty,
            # "force_field_path": forcefield_path,
        }
workflow = Workflow.from_schema(schema, metadata=global_metadata)

# Run short simulations until convergence
convergence_tolerance = 0.01  # g/mL
max_iterations = 10
steps = 10000  # Adjusted for shorter simulation time

previous_density = None
for iteration in range(max_iterations):
    current_density = run_short_simulation(client, workflow, benzene, thermodynamic_state, forcefield_path, steps)
    if previous_density is not None:
        if abs(current_density - previous_density) < convergence_tolerance:
            print(f"Density has converged: {current_density} g/mL")
            break
    previous_density = current_density
    print(f"Iteration {iteration + 1}: Current density = {current_density} g/mL")

# Run final production simulation if converged
if abs(current_density - previous_density) < convergence_tolerance:
    # Final production run
    final_production_simulation = OpenMMSimulation("final_production_simulation")
    final_production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "equilibration_simulation")
    final_production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    final_production_simulation.steps = 50000  # 0.1 ns run for testing

    workflow.schema.protocol_schemas.append(final_production_simulation)

    # Create and submit the estimation request for final production run
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )

    # Wait for results
    results = request.results()
    final_density = results[0].value
    print(f"Final density after production run: {final_density} g/mL")
else:
    print("Density did not converge within the maximum number of iterations.")

ImportError: cannot import name 'ProtocolPath' from 'openff.evaluator.workflow' (/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/workflow/__init__.py)

In [None]:


class CustomDensityWorkflow(Density):
    @staticmethod
    def default_simulation_schema(replicas: int = 3) -> WorkflowSchema:
        schema = WorkflowSchema()

        # Step 1: Build coordinates
        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 1000
        build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
        schema.protocol_schemas.append(build_coordinates.schema)

        # Step 2: Assign parameters
        assign_parameters = BuildSmirnoffSystem("assign_parameters")
        assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
        assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
        schema.protocol_schemas.append(assign_parameters.schema)

        # Step 3: Energy minimization
        energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
        energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", assign_parameters.id)
        energy_minimization.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        schema.protocol_schemas.append(energy_minimization.schema)

        # Step 4: Equilibration simulation
        equilibration_simulation = OpenMMSimulation("equilibration_simulation")
        equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
        equilibration_simulation.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
        equilibration_simulation.steps = 100000
        schema.protocol_schemas.append(equilibration_simulation.schema)

        #add global variables
        schema.metadata["force_field_path"] = forcefield_path

        return schema

def run_short_simulation(client, schema, benzene, thermodynamic_state, forcefield_path, steps):
    # Define the simulation protocol
    production_simulation = OpenMMSimulation("short_production_simulation")
    production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "energy_minimisation")
    production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    production_simulation.steps = steps

    schema.protocol_schemas.append(production_simulation.schema)
    
    # Create the workflow from schema
    workflow = Workflow.from_schema(schema, metadata=None)

    # Create and submit the estimation request
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )
    
    # Wait for results
    results = request.results()
    
    return results[0].value

# Define the substance from rdkit
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
benzene = Substance()
benzene.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Define the thermodynamic state
thermodynamic_state = ThermodynamicState(
    temperature=298.15 * unit.kelvin,
    pressure=1.0 * unit.atmosphere
)

# Define the force field
forcefield = ForceField("openff_unconstrained-2.1.0.offxml")
forcefield_path = "openff_unconstrained-2.1.0.offxml"

# Define the evaluator client
client = EvaluatorClient()
# Initial workflow
target_uncertainty = 0.1 * unit.grams / unit.milliliters
schema = CustomDensityWorkflow.default_simulation_schema()
global_metadata = {
            "substance": Density.substance,
        }

# Run short simulations until convergence
convergence_tolerance = 0.01  # g/mL
max_iterations = 10
steps = 10000  # Adjusted for shorter simulation time
schema.metadata = global_metadata
previous_density = None
for iteration in range(max_iterations):
    current_density = run_short_simulation(client, schema, benzene, thermodynamic_state, forcefield_path, steps)
    if previous_density is not None:
        if abs(current_density - previous_density) < convergence_tolerance:
            print(f"Density has converged: {current_density} g/mL")
            break
    previous_density = current_density
    print(f"Iteration {iteration + 1}: Current density = {current_density} g/mL")

# Run final production simulation if converged
if abs(current_density - previous_density) < convergence_tolerance:
    # Final production run
    final_production_simulation = OpenMMSimulation("final_production_simulation")
    final_production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", "equilibration_simulation")
    final_production_simulation.input_force_field = ProtocolPath("parameterized_system", "assign_parameters")
    final_production_simulation.steps = 50000  # 0.1 ns run for testing

    schema.protocol_schemas.append(final_production_simulation.schema)

    # Create the workflow from schema
    workflow = Workflow.from_schema(schema, metadata=global_metadata)

    # Create and submit the estimation request for final production run
    request = client.request_estimate(
        properties=[workflow],
        substances=[benzene],
        thermodynamic_states=[thermodynamic_state],
        options=RequestOptions()
    )

    # Wait for results
    results = request.results()
    final_density = results[0].value
    print(f"Final density after production run: {final_density} g/mL")
else:
    print("Density did not converge within the maximum number of iterations.")


AttributeError: 'WorkflowSchema' object has no attribute 'metadata'

In [1]:
import os
from rdkit import Chem
from rdkit.Chem import rdDistGeom
from openff.evaluator import unit
from openff.evaluator.client import EvaluatorClient, RequestOptions
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.workflow import Workflow
from openff.evaluator.workflow.utils import ProtocolPath

from openff.evaluator.workflow.schemas import WorkflowSchema
from openff.evaluator.properties.density import Density
from openff.evaluator.protocols.coordinates import BuildCoordinatesPackmol
from openff.evaluator.protocols.forcefield import BuildSmirnoffSystem
from openff.evaluator.protocols.openmm import OpenMMSimulation, OpenMMEnergyMinimisation

from openff.evaluator.utils.observables import ObservableType, ObservableArray
from openff.evaluator.substances import Substance, Component, MoleFraction
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.toolkit.typing.engines.smirnoff import ForceField



****** PyMBAR will use 64-bit JAX! *******
* JAX is currently set to 32-bit bitsize *
* which is its default.                  *
*                                        *
* PyMBAR requires 64-bit mode and WILL   *
* enable JAX's 64-bit mode when called.  *
*                                        *
* This MAY cause problems with other     *
* Uses of JAX in the same code.          *
******************************************



In [4]:
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))
metadata = {"substance": substance}

# Step 1: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False


# Step 2: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 3: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 4: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False

schema = WorkflowSchema()
schema.protocol_schemas = [build_coordinates.schema, assign_parameters.schema, energy_minimization.schema, equilibration_simulation.schema]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}
# Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

In [None]:
#now run production simulation for 5ns
production_simulation = OpenMMSimulation("production_simulation")
production_simulation.allow_merging = True
production_simulation.steps_per_iteration = 100000
production_simulation.total_number_of_iterations = 50
production_simulation.output_frequency = 5000
production_simulation.checkpoint_frequency = 10
production_simulation.timestep = 2.0 * unit.femtosecond
production_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
production_simulation.thermostat_friction = 1.0 / unit.picosecond
production_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
production_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
production_simulation.enable_pbc = True
production_simulation.allow_gpu_platforms = True
production_simulation.high_precision = False
schema.protocol_schemas.append(production_simulation.schema)

In [5]:
results = workflow.execute()

In [22]:
density_workflow = Density.default_simulation_schema()
density_workflow.workflow_schema.protocol_schemas[5].inputs

{'.allow_merging': True,
 '.time_series_statistics': <ProtocolPath full_path=conditional_group/average_density.time_series_statistics>,
 '.input_coordinate_file': <ProtocolPath full_path=conditional_group/production_simulation.output_coordinate_file>,
 '.input_trajectory_path': <ProtocolPath full_path=conditional_group/production_simulation.trajectory_file_path>}

In [None]:
# Step 6: Analysis step to check if density has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ObservableType.Density
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", production_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", production_simulation.id)
density_analysis.output_to_store = ProtocolPath("output_density", production_simulation.id)
density_analysis.conditional_group = "density_converged"
schema.protocol_schemas.append(density_analysis)

# Step 7: Final production run after density convergence
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing
final_production.conditional_group = "density_converged"
schema.protocol_schemas.append(final_production)

# Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)




In [40]:
from openff.evaluator.protocols.analysis import AverageObservable
from openff.evaluator.utils.observables import ObservableType, ObservableArray
from  openff.evaluator.protocols.groups import ConditionalGroup

In [38]:
# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 4: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 5: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False


# Step 6: Analysis step to check if density has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ObservableType.Density
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", equilibration_simulation.id)
density_analysis.output_observable = "density"

# Step 7: Define a conditional group for final production simulation
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing

conditional_group = ConditionalGroup("conditional_group")
conditional_group.add_protocols(final_production, density_analysis)

# Set up a condition which will check if the density uncertainty is less than some threshold.
condition = ConditionalGroup.Condition()
condition.condition_type = ConditionalGroup.Condition.Type.LessThan
condition.right_hand_value = 0.5 * unit.grams / unit.milliliter
condition.left_hand_value = ProtocolPath("value.error", conditional_group.id, density_analysis.id)

# Add the condition.
conditional_group.add_condition(condition)

# Step 8: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    equilibration_simulation.schema,
    density_analysis.schema,
    conditional_group.schema
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 9: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# Step 10: Execute the workflow
workflow.execute()


ValueError: The observable attribute can only accept values of type <class 'openff.evaluator.utils.observables.ObservableArray'>

In [93]:


# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 4: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 5: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False

# Step 6: Analysis step to check if density fluctuation has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ProtocolPath("observables.Density", equilibration_simulation.id)
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", equilibration_simulation.id)
density_analysis.output_observable = "density_fluctuation"

# Step 7: Define a conditional group for final production simulation
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing
final_production.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
final_production.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)

conditional_group = ConditionalGroup("conditional_group")
conditional_group.add_protocols(final_production, density_analysis)

# Set up a condition which will check if the density fluctuation is less than some threshold.
condition = ConditionalGroup.Condition()
condition.condition_type = ConditionalGroup.Condition.Type.LessThan
condition.right_hand_value = 0.5 * unit.grams / unit.milliliter  # Threshold for fluctuation
condition.left_hand_value = ProtocolPath("value.std_error", density_analysis.id)

# Add the condition.
conditional_group.add_condition(condition)

# Step 8: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    equilibration_simulation.schema,
    conditional_group.schema
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 9: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# Step 10: Execute the workflow
workflow.execute()


ValueError: The conditional_group protocol tries to take input from a non-existent protocol: density_analysis.value.std_error

In [82]:
density_analysis.get_attributes()

['id',
 'allow_merging',
 'bootstrap_iterations',
 'bootstrap_sample_size',
 'thermodynamic_state',
 'potential_energies',
 'value',
 'time_series_statistics',
 'observable',
 'divisor']

In [87]:
density_analysis.time_series_statistics

<openff.evaluator.attributes.attributes.UndefinedAttribute at 0x7d42d9d732d0>

In [58]:
#ValueError: Several protocols in the schema have the same id: ['density_analysis']. This is currently unsupported due to issues with merging two graphs which contain duplicate ids. print out all ids
for protocol in schema.protocol_schemas:
    print(protocol.id)

build_coordinates
assign_parameters
energy_minimisation
equilibration_simulation
density_analysis
conditional_group


In [66]:
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    equilibration_simulation.schema,
    density_analysis.schema,
    conditional_group.schema
]
for protocol in schema.protocol_schemas:
    print(protocol.id)

build_coordinates
assign_parameters
energy_minimisation
equilibration_simulation
density_analysis
conditional_group


In [70]:
schema.protocol_schemas[-1].protocol_schemas

{'final_production_simulation': <openff.evaluator.workflow.schemas.ProtocolSchema at 0x7d42d9fc7fd0>,
 'density_analysis': <openff.evaluator.workflow.schemas.ProtocolSchema at 0x7d430cf53850>}

In [36]:
equilibration_simulation.observables["Density"]

TypeError: 'UndefinedAttribute' object is not subscriptable

In [97]:
# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 4: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 5: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False

# Step 6: Analysis step to check if density has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ProtocolPath("observables.Density", equilibration_simulation.id)
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", equilibration_simulation.id)
density_analysis.output_observable = "density"

# Step 7: Define a conditional group for final production simulation
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing
final_production.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
final_production.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)

# Add a unique identifier to the conditional group and its protocols
conditional_group = ConditionalGroup("conditional_group")
conditional_group.add_protocols(final_production, density_analysis)

# Set up a condition which will check if the density uncertainty is less than some threshold.
condition = ConditionalGroup.Condition()
condition.condition_type = ConditionalGroup.Condition.Type.LessThan
condition.right_hand_value = 0.5 * unit.grams / unit.milliliter
condition.left_hand_value = ProtocolPath("value.error", density_analysis.id)

# Add the condition.
conditional_group.add_condition(condition)

# Step 8: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    equilibration_simulation.schema,
    conditional_group.schema
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 9: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# # Step 10: Execute the workflow
# workflow.execute()

ValueError: The conditional_group protocol tries to take input from a non-existent protocol: density_analysis.value.error

In [101]:

# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 4: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 5: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False


# Step 6: Analysis step to check if density fluctuation has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ProtocolPath("observables.Density", equilibration_simulation.id)
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", equilibration_simulation.id)
# Remove this line:
# density_analysis.output_observable = "density_fluctuation"

# Step 7: Define a conditional group for iterative equilibration simulation and analysis
conditional_group = ConditionalGroup("conditional_group")
conditional_group.add_protocols(equilibration_simulation, density_analysis)

# Set up a condition which will check if the density fluctuation is less than some threshold.
condition = ConditionalGroup.Condition()
condition.condition_type = ConditionalGroup.Condition.Type.LessThan
condition.right_hand_value = 0.1 * unit.grams / unit.milliliter  # Threshold for fluctuation
# Change this line:
condition.left_hand_value = ProtocolPath("value.standard_deviation", density_analysis.id)

# Add the condition.
conditional_group.add_condition(condition)

# Step 8: Final production simulation after density fluctuation has converged
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing
final_production.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
final_production.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)

# Step 9: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    conditional_group.schema,
    final_production.schema
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 10: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# Step 11: Execute the workflow
# workflow.execute()


ValueError: The conditional_group protocol tries to take input from a non-existent protocol: density_analysis.value.standard_deviation

In [110]:
from openff.evaluator.workflow import Protocol
from openff.evaluator.workflow.attributes import (
    InputAttribute,
    OutputAttribute,
)
from openff.evaluator.utils.observables import Observable
from openff.evaluator.attributes import UNDEFINED
class ComputeStandardDeviation(Protocol):
    observable = InputAttribute(
        docstring="The observable from which to compute the standard deviation.",
        type_hint=Observable,
        default_value=UNDEFINED,
    )
    standard_deviation = OutputAttribute(
        docstring="The computed standard deviation.",
        type_hint=float,
    )

    def _execute(self, directory, available_resources):
        import numpy as np

        values = self.observable.value
        self.standard_deviation = np.std(values)
from openff.evaluator.workflow import register_workflow_protocol
register_workflow_protocol(ComputeStandardDeviation)

In [120]:
from openff.evaluator.workflow import Protocol
from openff.evaluator.workflow.attributes import (
    InputAttribute,
    OutputAttribute,
)
from openff.evaluator.utils.observables import Observable
from openff.evaluator.attributes import UNDEFINED
import numpy as np
class ComputeStandardDeviation(Protocol):
    observable = InputAttribute(
        docstring="The observable from which to compute the standard deviation.",
        type_hint=Observable,
        default_value=UNDEFINED,
    )
    standard_deviation = OutputAttribute(
        docstring="The computed standard deviation.",
        type_hint=float,
    )

    def _execute(self, directory, available_resources):
        values = self.observable.value
        self.standard_deviation = np.std(values)
from openff.evaluator.workflow import register_workflow_protocol
try:
    register_workflow_protocol(ComputeStandardDeviation)
except ValueError:
    pass
# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")

# Step 4: Energy minimization
energy_minimization = OpenMMEnergyMinimisation("energy_minimisation")
energy_minimization.allow_merging = True
energy_minimization.input_coordinate_file = ProtocolPath("coordinate_file_path", build_coordinates.id)
energy_minimization.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
energy_minimization.tolerance = 10.0 * unit.kilojoules_per_mole / unit.nanometer
energy_minimization.enable_pbc = True
energy_minimization.max_iterations = 0

# Step 5: Equilibration simulation
equilibration_simulation = OpenMMSimulation("equilibration_simulation")
equilibration_simulation.allow_merging = True
equilibration_simulation.steps_per_iteration = 100000
equilibration_simulation.total_number_of_iterations = 1
equilibration_simulation.output_frequency = 5000
equilibration_simulation.checkpoint_frequency = 10
equilibration_simulation.timestep = 2.0 * unit.femtosecond
equilibration_simulation.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
equilibration_simulation.thermostat_friction = 1.0 / unit.picosecond
equilibration_simulation.input_coordinate_file = ProtocolPath("output_coordinate_file", energy_minimization.id)
equilibration_simulation.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)
equilibration_simulation.enable_pbc = True
equilibration_simulation.allow_gpu_platforms = True
equilibration_simulation.high_precision = False

# Step 6: Analysis step to check if density fluctuation has converged
density_analysis = AverageObservable("density_analysis")
density_analysis.observable = ProtocolPath("observables.Density", equilibration_simulation.id)
density_analysis.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
density_analysis.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
density_analysis.input_trajectory_path = ProtocolPath("trajectory_file_path", equilibration_simulation.id)

std_dev_analysis = ComputeStandardDeviation("std_dev_analysis")
std_dev_analysis.observable = ProtocolPath("value", density_analysis.id)

# Step 7: Define a conditional group for iterative equilibration simulation and analysis
conditional_group = ConditionalGroup("conditional_group")
conditional_group.add_protocols(equilibration_simulation, density_analysis, std_dev_analysis)

# Set up a condition which will check if the density fluctuation is less than some threshold.
condition = ConditionalGroup.Condition()
condition.condition_type = ConditionalGroup.Condition.Type.LessThan
condition.right_hand_value = 0.1 * unit.grams / unit.milliliter  # Threshold for fluctuation
condition.left_hand_value = ProtocolPath("standard_deviation", std_dev_analysis.id)

# Add the condition.
conditional_group.add_condition(condition)

# Step 8: Final production simulation after density fluctuation has converged
final_production = OpenMMSimulation("final_production_simulation")
final_production.input_coordinate_file = ProtocolPath("output_coordinate_file", equilibration_simulation.id)
final_production.input_force_field = ProtocolPath("parameterized_system", assign_parameters.id)
final_production.steps = 50000  # 0.1 ns run for testing
final_production.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
final_production.parameterized_system = ProtocolPath("parameterized_system", assign_parameters.id)


# Step 9: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
    energy_minimization.schema,
    conditional_group.schema,
    final_production.schema
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 10: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# Step 11: Execute the workflow
workflow.execute()


ValueError: The conditional_group protocol tries to take input from a non-existent protocol: std_dev_analysis.standard_deviation

In [127]:
import pandas as pd
df_equilib = pd.read_csv("/localhome/cschiebroek/MDFP_VP/mdfptools/equilibration_test.dat", delimiter = ",")
df_equilib

Unnamed: 0,"#""Step""",Temperature (K),Box Volume (nm^3),Density (g/mL)
0,10,4.616467,15.625000,0.954134
1,20,6.553867,15.625000,0.954134
2,30,9.144245,15.478026,0.963194
3,40,9.147018,15.478026,0.963194
4,50,12.826166,15.478026,0.963194
...,...,...,...,...
4995,49960,312.063079,14.873858,1.002319
4996,49970,313.989386,14.873858,1.002319
4997,49980,322.931182,14.963079,0.996342
4998,49990,305.587733,14.963079,0.996342


In [None]:
from openff.evaluator.workflow import Protocol
from openff.evaluator.workflow.attributes import (
    InputAttribute,
    OutputAttribute,
)
from openff.evaluator.utils.observables import Observable
from openff.evaluator.attributes import UNDEFINED
import numpy as np
class ComputeStandardDeviation(Protocol):
    observable = InputAttribute(
        docstring="The observable from which to compute the standard deviation.",
        type_hint=Observable,
        default_value=UNDEFINED,
    )
    standard_deviation = OutputAttribute(
        docstring="The computed standard deviation.",
        type_hint=float,
    )

    def _execute(self, directory, available_resources):
        values = self.observable.value
        self.standard_deviation = np.std(values)
from openff.evaluator.workflow import register_workflow_protocol
try:
    register_workflow_protocol(ComputeStandardDeviation)
except ValueError:
    pass
# Step 1: Convert SMILES to a molecule and add hydrogens
benzene_smiles = "c1ccccc1"
benzene_mol = Chem.AddHs(Chem.MolFromSmiles(benzene_smiles))
rdDistGeom.EmbedMolecule(benzene_mol)

# Convert molecule to SMILES again (to ensure it's correctly hydrogenated)
smiles = Chem.MolToSmiles(benzene_mol)
substance = Substance()
substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

# Step 2: Build coordinates
build_coordinates = BuildCoordinatesPackmol("build_coordinates")
build_coordinates.allow_merging = True
build_coordinates.count_exact_amount = True
build_coordinates.mass_density = 0.95 * unit.grams / unit.milliliters
build_coordinates.box_aspect_ratio = [1.0, 1.0, 1.0]
build_coordinates.substance = ProtocolPath("substance", "global")
build_coordinates.tolerance = 2.0 * unit.angstrom
build_coordinates.verbose_packmol = False
build_coordinates.retain_packmol_files = False

# Step 3: Assign parameters
assign_parameters = BuildSmirnoffSystem("assign_parameters")
assign_parameters.allow_merging = True
assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
assign_parameters.substance = ProtocolPath("substance", "global")


# Step 4: Define schema and metadata
schema = WorkflowSchema()
schema.protocol_schemas = [
    build_coordinates.schema,
    assign_parameters.schema,
]

metadata = {
    "substance": substance,
    "thermodynamic_state": ThermodynamicState(
        temperature=298.15 * unit.kelvin,
        pressure=1.0 * unit.atmosphere
    ),
    "force_field_path": "openff_unconstrained-2.1.0.offxml"
}

# Step 10: Create the workflow from schema
workflow = Workflow.from_schema(schema, metadata=metadata)

# Step 11: Execute the workflow
workflow.execute()


## Route 2: only packing and parm

In [24]:
from openff.toolkit import Molecule
import parmed
from openff.evaluator import unit
from openff.evaluator.substances import Substance,
def parameterize_system(smiles, force_field_path="openff_unconstrained-2.1.0.offxml", mass_density=0.95 * unit.grams / unit.milliliters):
    # Define the substance
    molecule = Molecule.from_smiles(smiles)
    substance = Substance()
    substance.add_component(Component(smiles=smiles), MoleFraction(1.0))

    # Create the workflow schema
    schema = WorkflowSchema()

    # Step 1: Build coordinates
    build_coordinates = BuildCoordinatesPackmol("build_coordinates")
    build_coordinates.max_molecules = 1000
    build_coordinates.mass_density = mass_density
    build_coordinates.substance = substance
    schema.protocol_schemas.append(build_coordinates.schema)

    # Step 2: Assign parameters
    assign_parameters = BuildSmirnoffSystem("assign_parameters")
    assign_parameters.force_field_path = force_field_path
    assign_parameters.coordinate_file_path = ProtocolPath("coordinate_file_path", build_coordinates.id)
    assign_parameters.substance = substance
    schema.protocol_schemas.append(assign_parameters.schema)

    # Metadata
    metadata = {
        "substance": substance,
        "thermodynamic_state": ThermodynamicState(
            temperature=298.15 * unit.kelvin,
            pressure=1.0 * unit.atmosphere
        ),
        "force_field_path": force_field_path
    }

    # Create and execute the workflow
    workflow = Workflow.from_schema(schema, metadata=metadata)
    workflow.execute()

    # Extract the parameterized system
    parameterized_system = workflow.output_data["assign_parameters.parameterized_system"]
    
    # Convert to parmed object
    pdb_file = workflow.output_data["build_coordinates.coordinate_file_path"]
    omm_top = app.PDBFile(pdb_file).topology
    parmed_obj = parmed.openmm.load_topology(omm_top, parameterized_system)

    return parmed_obj

# Example usage
smiles = "c1ccccc1"  # Benzene SMILES
parmed_obj = parameterize_system(smiles)


NameError: name 'Substance' is not defined

In [None]:
from Simulator import SolutionSimulator

print('Topology saved, simulating...')
traj_path = f"./"
confid = 'test'
SolutionSimulator.via_openmm(parmed_obj, file_name = confid, file_path = traj_path,
                             platform = "CUDA", num_steps = 5000 * 500)

In [None]:
register_calculation_schema(
    property_class=Density,
    layer_class=SimulationLayer,
    schema=Density.default_simulation_schema
)


In [1]:
from openff.evaluator.datasets.thermoml import ThermoMLDataSet


****** PyMBAR will use 64-bit JAX! *******
* JAX is currently set to 32-bit bitsize *
* which is its default.                  *
*                                        *
* PyMBAR requires 64-bit mode and WILL   *
* enable JAX's 64-bit mode when called.  *
*                                        *
* This MAY cause problems with other     *
* Uses of JAX in the same code.          *
******************************************



In [2]:
data_set = ThermoMLDataSet.from_doi(
    "10.1016",
    "10.1007",
    "10.1021",  
)

No ThermoML file could not be found at https://trc.nist.gov/ThermoML/10.1016.xml
No ThermoML file could not be found at https://trc.nist.gov/ThermoML/10.1007.xml
No ThermoML file could not be found at https://trc.nist.gov/ThermoML/10.1021.xml


In [3]:
data_set

In [3]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
from tqdm import tqdm
# Define the directory containing the ThermoML XML files
dirs = ["./ThermoML.v2020-09-30/10.1007", "./ThermoML.v2020-09-30/10.1016", "./ThermoML.v2020-09-30/10.1021"]
dir_name = dirs[0]
# Initialize a list to store the data
data = []
# Iterate through each XML file in the directory
for filename in tqdm(os.listdir(dir_name)):
    if filename.endswith(".xml"):
        filepath = os.path.join(dir_name, filename)
        
        # Parse the XML file
        tree = ET.parse(filepath)
        root = tree.getroot()
        
        # Extract relevant data (this is an example, adjust as needed)
        for substance in root.findall('Substance'):
            substance_id = substance.get('ID')
            for property in substance.findall('Property'):
                property_id = property.get('ID')
                for value in property.findall('Value'):
                    value_text = value.text
                    
                    # Append the extracted data to the list
                    data.append([substance_id, property_id, value_text])

# Convert the data to a pandas DataFrame
columns = ['SubstanceID', 'PropertyID', 'Value']
df = pd.DataFrame(data, columns=columns)


100%|██████████| 394/394 [00:01<00:00, 343.87it/s]


In [7]:
root.findall('Substance')

[]

In [4]:
df

Unnamed: 0,SubstanceID,PropertyID,Value


In [12]:
data_set = ThermoMLDataSet.from_doi(
    "10.1016/j.fluid.2013.10.034",
    "10.1021/je1013476",
)
data_set

<PhysicalPropertyDataSet n_properties=275 n_substances=254 n_sources=2>

In [13]:
from openff.evaluator.properties import Density, EnthalpyOfVaporization

density_schema = Density.default_simulation_schema(n_molecules=256)

density_schema

<openff.evaluator.layers.simulation.SimulationSchema at 0x7b3df8657990>

In [9]:
import pandas as pd
from openff.evaluator.datasets.curation.components.thermoml import (
    ImportThermoMLData,
    ImportThermoMLDataSchema,
)

# Import all data collected from the IJT journal.
data_frame = ImportThermoMLData.apply(pd.DataFrame(), ImportThermoMLDataSchema())
data_frame

  self.pid = os.fork()
An exception was raised when processing 10.1021/je049564z.xml. This file will be skipped.
Traceback (most recent call last):
  File "/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/datasets/curation/components/thermoml.py", line 73, in _process_archive
    data_set = ThermoMLDataSet.from_file(file_path)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/datasets/thermoml/thermoml.py", line 2089, in from_file
    data_set = cls._from_file(file)
               ^^^^^^^^^^^^^^^^^^^^
  File "/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/datasets/thermoml/thermoml.py", line 2122, in _from_file
    return_value = ThermoMLDataSet.from_xml(file.read(), source)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localhome/cschiebroek/MDFP_VP/mdfptools/carl/openff-evaluator/openff/evaluator/datasets/

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Exact Amount 1,Component 2,Role 2,Mole Fraction 2,Exact Amount 2,Density Value (g / ml),Density Uncertainty (g / ml),Source,Component 3,Role 3,Mole Fraction 3,Exact Amount 3,EnthalpyOfMixing Value (kJ / mol),EnthalpyOfMixing Uncertainty (kJ / mol),EnthalpyOfVaporization Value (kJ / mol),EnthalpyOfVaporization Uncertainty (kJ / mol),DielectricConstant Value (),DielectricConstant Uncertainty (),ExcessMolarVolume Value (cm ** 3 / mol),ExcessMolarVolume Uncertainty (cm ** 3 / mol)
0,08c501e79447409485103c84059f2e58,283.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0000,,,,,,0.73700,0.000150,10.1021/je800982n,,,,,,,,,,,,
1,f371febceeae4b00a16c2f288a513770,286.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0000,,,,,,0.73400,0.000150,10.1021/je800982n,,,,,,,,,,,,
2,2825fd3dc02e452e8576fc1fa43dd547,288.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0000,,,,,,0.73180,0.000150,10.1021/je800982n,,,,,,,,,,,,
3,03d3e2b82d1c4c08bcabc13d1ba3bde4,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0000,,,,,,0.72980,0.000150,10.1021/je800982n,,,,,,,,,,,,
4,9bc2a9ff1a2146b4b57425151b9b3073,291.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0000,,,,,,0.72870,0.000150,10.1021/je800982n,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646799,7bac657cadb04421a186faf08a0b485d,303.15,101.0,Liquid,2,COc1ccccc1,Solvent,0.6099,,CCCCCCCCCCCC,Solvent,0.3901,,0.84196,0.000320,10.1016/j.jct.2004.09.021,,,,,,,,,,,,
646800,1687efce4c29448f9271520b073899fd,303.15,101.0,Liquid,2,COc1ccccc1,Solvent,0.7020,,CCCCCCCCCCCC,Solvent,0.2980,,0.86678,0.000310,10.1016/j.jct.2004.09.021,,,,,,,,,,,,
646801,4c9e7cbb6a8641318c6caf1a8094c8f3,303.15,101.0,Liquid,2,COc1ccccc1,Solvent,0.8023,,CCCCCCCCCCCC,Solvent,0.1977,,0.89886,0.000295,10.1016/j.jct.2004.09.021,,,,,,,,,,,,
646802,6612d9eb0df747f8bd4a3433a3feca00,303.15,101.0,Liquid,2,COc1ccccc1,Solvent,0.8976,,CCCCCCCCCCCC,Solvent,0.1024,,0.93551,0.000280,10.1016/j.jct.2004.09.021,,,,,,,,,,,,


In [11]:
from openff.evaluator.datasets.curation.components.filtering import (
    FilterDuplicates,
    FilterDuplicatesSchema,
)
print(f'pre-filtering: {len(data_frame)}')
# filtered_frame = FilterDuplicates.apply(data_frame, FilterDuplicatesSchema())
# print(f'post duplicate filtering: {len(filtered_frame)}')

from openff.evaluator.datasets.curation.components.filtering import (
    FilterByTemperature,
    FilterByTemperatureSchema,
)

filtered_frame = FilterByTemperature.apply(
    data_frame,
    FilterByTemperatureSchema(minimum_temperature=290.0, maximum_temperature=320.0),
)
print(f'post temperature filtering: {len(filtered_frame)}')

from openff.evaluator.datasets.curation.components.filtering import (
    FilterByPressure,
    FilterByPressureSchema,
)

filtered_frame = FilterByPressure.apply(
    filtered_frame,
    FilterByPressureSchema(minimum_pressure=100.0, maximum_pressure=140.0),
)
print(f'post pressure filtering: {len(filtered_frame)}')

from openff.evaluator.datasets.curation.components.filtering import (
    FilterByPropertyTypes,
    FilterByPropertyTypesSchema,
)

# Retain only density measurements made for puresystems.
filtered_frame = FilterByPropertyTypes.apply(
    filtered_frame,
    FilterByPropertyTypesSchema(
        property_types=["Density"],
        n_components={"Density": [1]},
    ),
)
print(f'post property type filtering: {len(filtered_frame)}')

from openff.evaluator.datasets.curation.components.filtering import (
    FilterByNComponents,
    FilterByNComponentsSchema,
)

filtered_frame = FilterByNComponents.apply(
    filtered_frame, FilterByNComponentsSchema(n_components=[1])
)
print(f'post n-components filtering: {len(filtered_frame)}')

pre-filtering: 646804
post temperature filtering: 393920
post pressure filtering: 238149
post property type filtering: 76797
post n-components filtering: 76797


In [12]:
filtered_frame

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Density Value (g / ml),Density Uncertainty (g / ml),Source
3,03d3e2b82d1c4c08bcabc13d1ba3bde4,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n
4,9bc2a9ff1a2146b4b57425151b9b3073,291.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72870,0.000150,10.1021/je800982n
5,c20d201242384daea56a2fdd57b2b190,291.35,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72850,0.000150,10.1021/je800982n
39,5f489caf2b3148aea05e19f43778c0ee,290.15,101.0,Liquid,1,O,Solvent,1.0,0.99870,0.000100,10.1021/je800982n
49,107ad223e4d046b187eb2daf9225d728,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n
...,...,...,...,...,...,...,...,...,...,...,...
646781,956da7e6530843b2a5b6dab1768f4447,293.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.99381,0.000265,10.1016/j.jct.2004.09.021
646782,5d6a46fc149a4428b6145dbbd25a219b,298.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74514,0.000385,10.1016/j.jct.2004.09.021
646792,58303b2fb3ee44d8aae2782aa7f4b028,298.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.98915,0.000265,10.1016/j.jct.2004.09.021
646793,3bea8503ba3a42118d8abab96f6a378e,303.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74152,0.000390,10.1016/j.jct.2004.09.021


In [18]:
#make mols, do ChargeParent standardiziation and add inchikey
from rdkit import Chem
from rdkit.Chem.MolStandardize.rdMolStandardize import ChargeParent 
filtered_frame['InChiKey'] = filtered_frame['Component 1'].apply(lambda x: Chem.MolToInchiKey(ChargeParent(Chem.MolFromSmiles(x))))
filtered_frame

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Density Value (g / ml),Density Uncertainty (g / ml),Source,InChiKey
3,03d3e2b82d1c4c08bcabc13d1ba3bde4,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
4,9bc2a9ff1a2146b4b57425151b9b3073,291.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72870,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
5,c20d201242384daea56a2fdd57b2b190,291.35,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72850,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
39,5f489caf2b3148aea05e19f43778c0ee,290.15,101.0,Liquid,1,O,Solvent,1.0,0.99870,0.000100,10.1021/je800982n,XLYOFNOQVPJJNP-UHFFFAOYSA-N
49,107ad223e4d046b187eb2daf9225d728,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
...,...,...,...,...,...,...,...,...,...,...,...,...
646781,956da7e6530843b2a5b6dab1768f4447,293.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.99381,0.000265,10.1016/j.jct.2004.09.021,RDOXTESZEPMUJZ-UHFFFAOYSA-N
646782,5d6a46fc149a4428b6145dbbd25a219b,298.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74514,0.000385,10.1016/j.jct.2004.09.021,SNRUBQQJIBEYMU-UHFFFAOYSA-N
646792,58303b2fb3ee44d8aae2782aa7f4b028,298.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.98915,0.000265,10.1016/j.jct.2004.09.021,RDOXTESZEPMUJZ-UHFFFAOYSA-N
646793,3bea8503ba3a42118d8abab96f6a378e,303.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74152,0.000390,10.1016/j.jct.2004.09.021,SNRUBQQJIBEYMU-UHFFFAOYSA-N


In [20]:
filtered_frame

Unnamed: 0,Id,Temperature (K),Pressure (kPa),Phase,N Components,Component 1,Role 1,Mole Fraction 1,Density Value (g / ml),Density Uncertainty (g / ml),Source,InChiKey
3,03d3e2b82d1c4c08bcabc13d1ba3bde4,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
4,9bc2a9ff1a2146b4b57425151b9b3073,291.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72870,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
5,c20d201242384daea56a2fdd57b2b190,291.35,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72850,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
39,5f489caf2b3148aea05e19f43778c0ee,290.15,101.0,Liquid,1,O,Solvent,1.0,0.99870,0.000100,10.1021/je800982n,XLYOFNOQVPJJNP-UHFFFAOYSA-N
49,107ad223e4d046b187eb2daf9225d728,290.15,101.0,Liquid,1,CCN(CC)CC,Solvent,1.0,0.72980,0.000150,10.1021/je800982n,ZMANZCXQSJIPKH-UHFFFAOYSA-N
...,...,...,...,...,...,...,...,...,...,...,...,...
646781,956da7e6530843b2a5b6dab1768f4447,293.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.99381,0.000265,10.1016/j.jct.2004.09.021,RDOXTESZEPMUJZ-UHFFFAOYSA-N
646782,5d6a46fc149a4428b6145dbbd25a219b,298.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74514,0.000385,10.1016/j.jct.2004.09.021,SNRUBQQJIBEYMU-UHFFFAOYSA-N
646792,58303b2fb3ee44d8aae2782aa7f4b028,298.15,101.0,Liquid,1,COc1ccccc1,Solvent,1.0,0.98915,0.000265,10.1016/j.jct.2004.09.021,RDOXTESZEPMUJZ-UHFFFAOYSA-N
646793,3bea8503ba3a42118d8abab96f6a378e,303.15,101.0,Liquid,1,CCCCCCCCCCCC,Solvent,1.0,0.74152,0.000390,10.1016/j.jct.2004.09.021,SNRUBQQJIBEYMU-UHFFFAOYSA-N


In [13]:
#save this one
filtered_frame.to_csv("filtered_frame_densities_ThermoMLData.csv")
#and the raw one
data_frame.to_csv("raw_frame_densities_ThermoMLData.csv")

In [16]:
#lets check for these in the db
df_confs = pd.read_csv('/localhome/cschiebroek/MDFP_VP/mdfptools/carl/data_curation/cs_mdfps_schema_experimental_data.csv')
df_confs
smiles_experimental_vps = df_confs['smiles'].tolist()
smiles_liquid_densities = filtered_frame['Component 1'].tolist()
#check overlap
missing_densities = [s for s in smiles_experimental_vps if s not in smiles_liquid_densities]
print(len(missing_densities),len(smiles_experimental_vps),len(smiles_liquid_densities))

2617 3050 76797


In [21]:
df_confs['InChiKey'] = df_confs['smiles'].apply(lambda x: Chem.MolToInchiKey(ChargeParent(Chem.MolFromSmiles(x))))
df_confs

Unnamed: 0,molregno,conf_id,vp_log10_pa,mdfp,molblock,md_experiment_uuid,confgen_uuid,ROMol,smiles,InChiKey
0,2390,2388,2.325202,"{'mdfp': '[10, 1, 0, 0, 0, 0, 0, 0, 0, 0, -24....",[H]C([H])=C1C([H])=C([H])[C@]([H])(C([H])(C([H...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef1e1700>,C=C1C=C[C@H](C(C)C)CC1,LFJQCDVYDGGFCH-JTQLQIEISA-N
1,2396,2394,2.092402,"{'mdfp': '[7, 4, 0, 1, 0, 0, 0, 0, 0, 0, -4.74...",[H]OC([H])([H])C([H])([H])C([H])([H])C([H])([H...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef1e18c0>,CCCCCCO,ZSIAUFGUXNUGDI-UHFFFAOYSA-N
2,2399,2397,-3.979000,"{'mdfp': '[19, 1, 0, 0, 0, 0, 0, 7, 0, 0, -18....",[H]c1c(Cl)c(Cl)c(Cl)c(-c2c([H])c(Cl)c(Cl)c(Cl)...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef1e1930>,Clc1cc(-c2c(Cl)cc(Cl)c(Cl)c2Cl)cc(Cl)c1Cl,TVFXBXWAXIMLAQ-UHFFFAOYSA-N
3,2422,2420,3.031002,"{'mdfp': '[7, 0, 2, 0, 0, 0, 0, 0, 0, 0, 226.2...",[H]c1nc([H])c(C([H])([H])[H])nc1[H]\n RDKi...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef1e19a0>,Cc1cnccn1,CAWHJQAVHZEVTJ-UHFFFAOYSA-N
4,2400,2398,2.056402,"{'mdfp': '[10, 1, 0, 0, 0, 0, 0, 0, 0, 0, -30....",[H]c1c([H])c(C([H])([H])C([H])([H])[H])c(C([H]...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef1e1a10>,CCc1ccc(C)cc1C,MEMBJMDZWKVOTB-UHFFFAOYSA-N
...,...,...,...,...,...,...,...,...,...,...
3045,2690,2688,3.725002,"{'mdfp': '[7, 1, 0, 0, 0, 0, 0, 0, 0, 0, -2.05...",[H]C([H])([H])C([H])([H])C1([H])C([H])([H])C([...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef286810>,CCC1CCCC1,IFTRQJLVEBNKJK-UHFFFAOYSA-N
3046,2758,2756,2.352403,"{'mdfp': '[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, -25.6...",[H]c1c([H])c(C([H])([H])[H])c(C([H])([H])[H])c...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef286880>,Cc1cccc(C)c1C,FYGHSUNMUKGBRK-UHFFFAOYSA-N
3047,2801,2799,3.181803,"{'mdfp': '[6, 0, 0, 0, 0, 0, 1, 0, 0, 0, -5.63...",[H]SC1([H])C([H])([H])C([H])([H])C([H])([H])C1...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef2868f0>,SC1CCCC1,WVDYBOADDMMFIY-UHFFFAOYSA-N
3048,2829,2827,-3.416498,"{'mdfp': '[9, 4, 0, 4, 0, 0, 0, 0, 0, 0, -320....",[H]OC(=O)C([H])([H])C([H])([H])C([H])([H])C(=O...,fc57851e-b654-4338-bcdd-faa28ec66253,906589dd-76fa-4d7b-aa9f-1ee90abe3835,<rdkit.Chem.rdchem.Mol object at 0x7732ef286960>,O=C(O)CCCC(=O)O,JFCQEDHGNNZCLN-UHFFFAOYSA-N


In [22]:
inchikeys_with_vp  = df_confs['InChiKey'].tolist()
inchikeys_with_density = filtered_frame['InChiKey'].tolist()
missing_densities = [s for s in inchikeys_with_vp if s not in inchikeys_with_density]
print(len(missing_densities),len(inchikeys_with_vp),len(inchikeys_with_density))

2608 3050 76797


In [None]:
# generate_simulation_protocols()