# Load MLatom

In [None]:
# Clone the repository and install the necessary packages
!git clone https://github.com/JakubMartinka/karlsruhe2024.git
!pip3 install mlatom
!pip3 install py3Dmol
!pip3 install mkl
!pip3 install mkl-service
!pip3 install ase

In [None]:
import mlatom as ml

# Train and use models for hydrogen molecule

## KREG

In [None]:
# load data set
molDB = ml.data.molecular_database.from_xyz_file('karlsruhe2024/materials/h2.xyz')
molDB.add_scalar_properties_from_file('karlsruhe2024/materials/E_FCI_451.dat', 'energy')

In [None]:
# check visually the data set if you want (rotate the H2 molecule or you will see just the top atom)
molDB.view()

In [None]:
# define the model
model = ml.models.kreg(model_file='energies')

In [None]:
# split data set for optimizing hyperparameters
subtraining_molDB, validation_molDB = molDB.split(fraction_of_points_in_splits=[0.8, 0.2], sampling='random')

In [None]:
# optimize hyperparameters
import time
start = time.time()
model.hyperparameters["sigma"].minval = 2**-4
model.optimize_hyperparameters(subtraining_molecular_database=subtraining_molDB,
                                     validation_molecular_database=validation_molDB,
                                     optimization_algorithm='nelder-mead',
                                     hyperparameters=['lambda', 'sigma'],
                                     training_kwargs={'property_to_learn': 'energy'},
                                     prediction_kwargs=None)
lmbd = model.hyperparameters['lambda'].value
sigma = model.hyperparameters['sigma'].value
print(f'Optimized hyperparameters: lambda={lmbd}, sigma={sigma}')

# train the final model
model.train(molecular_database=molDB, property_to_learn='energy')
end = time.time()

In [None]:
print(f'Training time: {end - start} s')

In [None]:
# initial geometry
mol = ml.data.molecule.from_xyz_string('''2

H             0.0000000000000           0.0000000000000           0.0000000000000
H             0.0000000000000           0.0000000000000           0.8000000000000
''')

In [None]:
# you can load the model later too (no need if you trained it before)
model = ml.models.kreg(model_file='energies')

In [None]:
# run geometry optimization
ml.optimize_geometry(model=model, molecule=mol, program='ASE')
print(mol.get_xyz_string())

In [None]:
# let's see what is the final bond length of H2
Rkreg = mol.internuclear_distance(0, 1)
print(f'H2 bond length is {Rkreg} Angstrom')

## TorchANI

In [None]:
# let's do the same but with ANI

In [None]:
# define the model
ani = ml.models.ani(model_file='energies_ani_api.pt', hyperparameters={'max_epochs': 200})

# train the final model
start = time.time()
ani.train(molecular_database=molDB, property_to_learn='energy')
end = time.time()

In [None]:
print(f'Training time: {end - start} s')

In [None]:
# run geometry optimization
# initial geometry
molani = ml.data.molecule.from_xyz_string('''2

H             0.0000000000000           0.0000000000000           0.0000000000000
H             0.0000000000000           0.0000000000000           0.8000000000000
''')
ml.optimize_geometry(model=ani, molecule=molani, program='ASE')
print(molani.get_xyz_string())

In [None]:
# let's see what is the final bond length of H2
Rani = molani.internuclear_distance(0, 1)
print(f'H2 bond length is {Rani} Angstrom')

In [None]:
# difference between KREG and ANI predictions:
Rani - Rkreg

# Same but with MACE

In [None]:
# Install MACE
!git clone https://github.com/ACEsuit/mace.git --branch v0.3.2
!pip install ./mace

In [None]:
# define the model
mace = ml.models.mace(model_file='energies_mace.pt', hyperparameters={'max_num_epochs': 50})

# train the final model
start = time.time()
mace.train(molecular_database=molDB, property_to_learn='energy')
end = time.time()

In [None]:
print(f'Training time: {end - start} s')

In [None]:
# run geometry optimization
# initial geometry
molmace = ml.data.molecule.from_xyz_string('''2

H             0.0000000000000           0.0000000000000           0.0000000000000
H             0.0000000000000           0.0000000000000           0.8000000000000
''')
ml.optimize_geometry(model=mace, molecule=molmace, program='ASE')
print(molani.get_xyz_string())

In [None]:
# let's see what is the final bond length of H2
Rmace = molmace.internuclear_distance(0, 1)
print(f'H2 bond length is {Rani} Angstrom')

In [None]:
# difference between KREG and ANI predictions:
Rmace - Rkreg