In [1]:
import ipsuite as ips

2023-07-24 20:33:51,131 (DEBUG): Welcome to IPS - the Interatomic Potential Suite!


DFT based on https://www.sciencedirect.com/science/article/pii/S1386142521004455#b0030

In [2]:
thermostat = ips.calculators.LangevinThermostat(
    temperature=298.15, friction=0.01, time_step=0.5
)

uncertainty_check = ips.analysis.ThresholdCheck(
    value="energy_uncertainty", max_value=2.0, larger_only=True
)

with ips.Project(automatic_node_names=True) as project:
    mol = ips.configuration_generation.SmilesToAtoms(smiles="CS(=O)C")

    # Create a box of atoms.
    packmol = ips.configuration_generation.Packmol(
        data=[mol.atoms], count=[38], density=1095.2
    )

    # Define the CP2K calculations
    cp2k = ips.calculators.CP2KSinglePoint(
        data=packmol.atoms,
        cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
        cp2k_shell="cp2k_shell.ssmp",
    )

    geopt = ips.calculators.ASEGeoOpt(
        model=cp2k,
        data=packmol.atoms,
        optimizer="BFGS",
        run_kwargs={"fmax": 0.5},
    )

    test_selection = ips.configuration_selection.RandomSelection(data=geopt.atoms, n_configurations=20)
    
    train_data = test_selection.excluded_atoms
    test_data = test_selection.atoms

    for cycle in range(3):
        with project.group(name=f"AL_{cycle}") as group:
            # Define the ML model
            model1 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_1.yaml",
            )
            model2 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_2.yaml",
            )

            ensemble_model = ips.models.EnsembleModel(models=[model1, model2])

            md = ips.calculators.ASEMD(
                    data=geopt.atoms,
                    data_id=-1,
                    model=ensemble_model,
                    thermostat=thermostat,
                    checker_list=[uncertainty_check],
                    steps=50000,
                    sampling_rate=1,
                )
            
            train_data_selection = ips.configuration_selection.ThresholdSelection(
                    data=md, n_configurations=10, min_distance=10
                )

            test_data_selection = ips.configuration_selection.RandomSelection(
                data=md,
                n_configurations=5,
                exclude_configurations=train_data_selection.selected_configurations,
            )

            train_data += ips.calculators.CP2KSinglePoint(
                    data=train_data_selection,
                    cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                    cp2k_shell="cp2k_shell.ssmp",
                    wfn_restart_node=cp2k, # these are not set in cycle 0, so use nodes=[group] to build, or commit afterwards
                ).atoms

            test_data += ips.calculators.CP2KSinglePoint(
                data=test_data_selection,
                cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                cp2k_shell="cp2k_shell.ssmp",
                wfn_restart_node=cp2k,
            ).atoms

            # evaluate the model
            prediction = ips.analysis.Prediction(model=ensemble_model, data=test_data)
            metrics = ips.analysis.PredictionMetrics(data=prediction)

project.build(nodes=[group])

Running DVC command: 'stage add --name AL_2_MLModel --force ...'


Running DVC command: 'stage add --name AL_2_MLModel_1 --force ...'
Running DVC command: 'stage add --name AL_2_EnsembleModel --force ...'
Running DVC command: 'stage add --name AL_2_ASEMD --force ...'
Running DVC command: 'stage add --name AL_2_ASEMD_checker_list_0 --force ...'
Running DVC command: 'stage add --name AL_2_ASEMD_thermostat --force ...'
Running DVC command: 'stage add --name AL_2_ConfigurationSelection_1 --force ...'
Running DVC command: 'stage add --name AL_2_ConfigurationSelection_2 --force ...'
Running DVC command: 'stage add --name AL_2_CP2KSinglePoint_1 --force ...'
Running DVC command: 'stage add --name AL_2_CP2KSinglePoint_2 --force ...'
Running DVC command: 'stage add --name AL_2_Prediction --force ...'
Running DVC command: 'stage add --name AL_2_PredictionMetrics --force ...'
