In [1]:
import ipsuite as ips

2023-08-03 10:56:27,161 (DEBUG): Welcome to IPS - the Interatomic Potential Suite!


DFT based on https://www.sciencedirect.com/science/article/pii/S1386142521004455#b0030

In [2]:
import dvc.repo

repo = dvc.repo.Repo()
for stage in repo.index.stages:
    print(stage.addressing)
    repo.pull(stage.addressing)

BASIS_MOLOPT.dvc
GTH_POTENTIALS.dvc
dftd3.dat.dvc
AL_0_ASEMD
AL_0_ASEMD_checker_list_0
AL_0_ASEMD_thermostat
AL_0_BoxScale
AL_0_BoxScale_mapping
AL_0_CP2KSinglePoint_1
AL_0_CP2KSinglePoint_2
AL_0_ConfigurationSelection_1
AL_0_ConfigurationSelection_2
AL_0_EnergyUncertaintyHistogram
AL_0_EnsembleModel
AL_0_ForceDecomposition
AL_0_ForcesUncertaintyHistogram
AL_0_MLModel
AL_0_MLModel_1
AL_0_Prediction
AL_0_PredictionMetrics
AL_1_ASEMD
AL_1_ASEMD_checker_list_0
AL_1_ASEMD_thermostat
AL_1_BoxScale
AL_1_BoxScale_mapping
AL_1_CP2KSinglePoint_1
AL_1_CP2KSinglePoint_2
AL_1_ConfigurationSelection_1
AL_1_ConfigurationSelection_2
AL_1_EnergyUncertaintyHistogram
AL_1_EnsembleModel
AL_1_ForceDecomposition
AL_1_ForcesUncertaintyHistogram
AL_1_MLModel
AL_1_MLModel_1
AL_1_Prediction
AL_1_PredictionMetrics
AL_2_ASEMD
AL_2_ASEMD_checker_list_0
AL_2_ASEMD_thermostat
AL_2_BoxScale
AL_2_BoxScale_mapping
AL_2_CP2KSinglePoint_1
AL_2_CP2KSinglePoint_2
AL_2_ConfigurationSelection_1
AL_2_ConfigurationSelection_2

In [3]:
# cp2k_shell = "cp2k_shell.ssmp"
cp2k_shell = "mpirun -np 12 /home/linux38_i1/schaefer/miniconda3/envs/dmso/bin/cp2k_shell.psmp"

In [4]:
thermostat = ips.calculators.LangevinThermostat(
    temperature=298.15, friction=0.01, time_step=0.5
)

uncertainty_check = ips.analysis.ThresholdCheck(
    value="energy_uncertainty", max_value=2.0, larger_only=True
)

mapping = ips.geometry.BarycenterMapping(data=None)
temperature_oszillator = ips.calculators.TemperatureOscillatingRampModifier(
    end_temperature=450,  # boiling around 460
    start_temperature=270,  # melting around 290
    num_oscillations=10,
    temperature_amplitude=150,
)

box_oszillator = ips.calculators.BoxOscillatingRampModifier(
    cell_amplitude=1,
    num_oscillations=3,
)

with ips.Project(automatic_node_names=True) as project:
    mol = ips.configuration_generation.SmilesToAtoms(smiles="CS(=O)C")

    # Create a box of atoms.
    packmol = ips.configuration_generation.Packmol(
        data=[mol.atoms], count=[38], density=1095.2
    )

    # Define the CP2K calculations
    cp2k = ips.calculators.CP2KSinglePoint(
        data=packmol.atoms,
        cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
        cp2k_shell=cp2k_shell,
    )

    geopt = ips.calculators.ASEGeoOpt(
        model=cp2k,
        data=packmol.atoms,
        optimizer="BFGS",
        run_kwargs={"fmax": 0.5},
    )

    test_selection = ips.configuration_selection.RandomSelection(
        data=geopt.atoms, n_configurations=20
    )

    train_data = test_selection.excluded_atoms
    test_data = test_selection.atoms

    for cycle in range(6):
        with project.group(name=f"AL_{cycle}") as group:
            # Define the ML model
            model1 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_1.yaml" if cycle < 5 else "config/apax_3.yaml",
            )
            model2 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_2.yaml" if cycle < 5 else "config/apax_4.yaml",
            )

            ensemble_model = ips.models.EnsembleModel(models=[model1, model2])

            md = ips.calculators.ASEMD(
                data=geopt.atoms,
                data_id=-1,
                model=ensemble_model,
                thermostat=thermostat,
                checker_list=[uncertainty_check],
                steps=50000,
                sampling_rate=1,
            )

            train_data_selection = ips.configuration_selection.ThresholdSelection(
                data=md, n_configurations=10, min_distance=10
            )

            test_data_selection = ips.configuration_selection.RandomSelection(
                data=md,
                n_configurations=5,
                exclude_configurations=train_data_selection.selected_configurations,
            )

            # evaluate the model
            prediction = ips.analysis.Prediction(model=ensemble_model, data=test_data)
            metrics = ips.analysis.PredictionMetrics(data=prediction)
            force_decomposition = ips.analysis.ForceDecomposition(data=prediction)

            train_data += ips.calculators.CP2KSinglePoint(
                data=train_data_selection,
                cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                cp2k_shell=cp2k_shell,
                wfn_restart_node=cp2k,
            ).atoms

            test_data += ips.calculators.CP2KSinglePoint(
                data=test_data_selection,
                cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                cp2k_shell=cp2k_shell,
                wfn_restart_node=cp2k,
            ).atoms

            md_forces_uncertainty = ips.analysis.ForcesUncertaintyHistogram(
                data=md.atoms
            )
            md_energy_uncertainty = ips.analysis.EnergyUncertaintyHistogram(
                data=md.atoms
            )

            volume_scan = ips.analysis.BoxScale(
                data=geopt.atoms,
                mapping=mapping,
                model=ensemble_model,
                start=0.9,
                data_id=-1,
            )

    with project.group(name="bootstrap_0") as group:
        bootstrap_train_data = (
            ips.bootstrap.RotateMolecules(
                data=geopt.atoms,
                data_id=-1,
                n_configurations=10,
                maximum=10 * 3.1415 / 180,  # deg max rotation
                include_original=False,
                seed=1,
            ).atoms
            + ips.bootstrap.TranslateMolecules(
                data=geopt.atoms,
                data_id=-1,
                n_configurations=10,
                maximum=0.2,  # Ang max molecular displacement
                include_original=False,
                seed=1,
            ).atoms
        )

        bootstrap_test_data = (
            ips.bootstrap.RotateMolecules(
                data=geopt.atoms,
                data_id=-1,
                n_configurations=5,
                maximum=10 * 3.1415 / 180,  # deg max rotation
                include_original=False,
                seed=2,
                name="RotateMolecules_test",
            ).atoms
            + ips.bootstrap.TranslateMolecules(
                data=geopt.atoms,
                data_id=-1,
                n_configurations=5,
                maximum=0.2,  # Ang max molecular displacement
                include_original=False,
                seed=2,
                name="TranslateMolecules_test",
            ).atoms
        )

        train_data += ips.calculators.CP2KSinglePoint(
            data=bootstrap_train_data,
            cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
            cp2k_shell=cp2k_shell,
            wfn_restart_node=cp2k,
        ).atoms

        test_data += ips.calculators.CP2KSinglePoint(
            data=bootstrap_test_data,
            cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
            cp2k_shell=cp2k_shell,
            wfn_restart_node=cp2k,
        ).atoms

        model1 = ips.models.Apax(
            data=train_data,
            validation_data=test_data,
            config="config/apax_3.yaml",
        )
        model2 = ips.models.Apax(
            data=train_data,
            validation_data=test_data,
            config="config/apax_4.yaml",
        )

        ensemble_model = ips.models.EnsembleModel(models=[model1, model2])

        # md = ips.calculators.ASEMD(
        #         data=geopt.atoms,
        #         data_id=-1,
        #         model=ensemble_model,
        #         thermostat=thermostat,
        #         checker_list=[uncertainty_check],
        #         steps=50000,
        #         sampling_rate=100,
        #     )

        # evaluate the model
        prediction = ips.analysis.Prediction(model=ensemble_model, data=test_data)
        metrics = ips.analysis.PredictionMetrics(data=prediction)

        force_decomposition = ips.analysis.ForceDecomposition(data=prediction)
        volume_scan = ips.analysis.BoxScale(
            data=geopt.atoms,
            mapping=mapping,
            model=ensemble_model,
            start=0.9,
            data_id=-1,
        )

    with project.group(name="bootstrap_1") as group:
        md = ips.calculators.ASEMD(
            data=geopt.atoms,
            data_id=-1,
            model=ensemble_model,
            thermostat=thermostat,
            checker_list=[uncertainty_check],
            modifier=[temperature_oszillator, box_oszillator],
            steps=10000,
            sampling_rate=100,
        )

        train_data_selection_1 = ips.configuration_selection.ThresholdSelection(
            data=md, n_configurations=20, min_distance=10
        )
        train_data_selection_2 = ips.configuration_selection.RandomSelection(
            data=train_data_selection_1.excluded_atoms, n_configurations=80
        )
        test_data_selection = ips.configuration_selection.RandomSelection(
            data=train_data_selection_2.excluded_atoms, n_configurations=20
        )

        train_data += ips.calculators.CP2KSinglePoint(
            data=train_data_selection_1.atoms + train_data_selection_2.atoms,
            cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
            cp2k_shell=cp2k_shell,
            wfn_restart_node=cp2k,
        ).atoms

        test_data += ips.calculators.CP2KSinglePoint(
            data=test_data_selection.atoms,
            cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
            cp2k_shell=cp2k_shell,
            wfn_restart_node=cp2k,
        ).atoms


    for cycle in range(2):
        with project.group(name=f"AL2_{cycle}") as group:
            model1 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_al2_1.yaml",
            )
            model2 = ips.models.Apax(
                data=train_data,
                validation_data=test_data,
                config="config/apax_al2_2.yaml",
            )

            ensemble_model = ips.models.ApaxEnsemble(models=[model1, model2])

            # evaluate the model
            prediction = ips.analysis.Prediction(model=ensemble_model, data=test_data)
            metrics = ips.analysis.PredictionMetrics(data=prediction)
            force_decomposition = ips.analysis.ForceDecomposition(data=prediction)


            # get starting structure
            model_geopt = ips.calculators.ASEGeoOpt(
                model=ensemble_model,
                data=md.atoms,
                data_id=-1,
                optimizer="BFGS",
                run_kwargs={"fmax": 0.5},
            )

            ref_geopt = ips.calculators.ASEGeoOpt(
                model=cp2k,
                data=model_geopt.atoms,
                data_id=-1,
                optimizer="BFGS",
                run_kwargs={"fmax": 1.0},
            )

            md = ips.calculators.ASEMD(
                data=md.atoms,
                data_id=-1,
                model=ensemble_model,
                thermostat=thermostat,
                checker_list=[uncertainty_check],
                modifier=[temperature_oszillator, box_oszillator],
                steps=10000,
                sampling_rate=100,
            )

            train_data_selection = ips.configuration_selection.ThresholdSelection(
                data=md, n_configurations=20, min_distance=20
            )

            test_data_selection = ips.configuration_selection.RandomSelection(
                data=md,
                n_configurations=5,
                exclude_configurations=train_data_selection.selected_configurations,
            )

            train_data += ref_geopt.atoms
            train_data += ips.calculators.CP2KSinglePoint(
                data=train_data_selection.atoms,
                cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                cp2k_shell=cp2k_shell,
                wfn_restart_node=cp2k,
            ).atoms

            test_data += ips.calculators.CP2KSinglePoint(
                data=test_data_selection.atoms,
                cp2k_files=["BASIS_MOLOPT", "GTH_POTENTIALS", "dftd3.dat"],
                cp2k_shell=cp2k_shell,
                wfn_restart_node=cp2k,
            ).atoms


    with project.group(name="final") as final:
        model = ips.models.Apax(
            data=train_data,
            validation_data=test_data,
            config="config/apax_final.yaml",
        )

        # evaluate the model
        prediction = ips.analysis.Prediction(model=model, data=test_data)
        metrics = ips.analysis.PredictionMetrics(data=prediction)

        force_decomposition = ips.analysis.ForceDecomposition(data=prediction)
        volume_scan = ips.analysis.BoxScale(
            data=geopt.atoms, mapping=mapping, model=model, start=0.9, data_id=-1
        )

    
project.build(nodes=[group]) # final

2023-08-03 11:06:06.902140: E external/xla/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: forward compatibility was attempted on non supported HW
2023-08-03 11:06:06.906246: E external/xla/xla/stream_executor/cuda/cuda_diagnostics.cc:312] kernel version 470.182.3 does not match DSO version 470.199.2 -- cannot find working devices in this configuration
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
2023-08-03 11:06:14.476981: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-08-03 11:06:14.477424: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directo