In [1]:
import sys
import os 

schnetpack_dir = os.path.dirname(os.getcwd())
sys.path.insert(1, schnetpack_dir + "\\src")

import schnetpack as spk
from schnetpack.data import ASEAtomsData
import schnetpack.transform as trn

import torch
import torchmetrics
import pytorch_lightning as pl


In [8]:
from schnetpack.datasets import rMD17
from schnetpack.transform import ASENeighborList

filepath_db = os.path.join(os.getcwd(), 'data\\rMD17\\rMD17.db')
#filepath_split = os.path.join(os.getcwd(), 'data\\rMD17\\split_qm9.npz')

ethanol_data = rMD17(
    filepath_db, 
    molecule='ethanol',
    batch_size=10,
    num_train=100_000,
    num_val=10_000,
    transforms=[
        trn.ASENeighborList(cutoff=5.),
        trn.RemoveOffsets(rMD17.energy, remove_mean=True, remove_atomrefs=False),
        trn.CastTo32()
    ],
    num_workers=1,
    pin_memory=True, # set to false, when not using a GPU
)


ethanol_data.prepare_data()
ethanol_data.setup()


100%|██████████| 10000/10000 [05:10<00:00, 32.19it/s]


## Run define_hessian_database.py here

In [2]:
filepath_hessian_db = os.path.join(os.getcwd(), 'data\\ene_grad_hess_1000eth\\data.db')
filepath_no_hessian_db = os.path.join(os.getcwd(), 'data\\ene_grad_hess_1000eth\\data-no-hessian.db')

hessianData = spk.data.AtomsDataModule(
    filepath_hessian_db, 
    distance_unit="Ang",
    property_units={"energy": "Hartree",
                    "forces": "Hartree/Bohr",
                    "hessian": "Hartree/Bohr/Bohr"
                    },
    batch_size=10,
    
    transforms=[
        trn.ASENeighborList(cutoff=5.),
        trn.RemoveOffsets("energy", remove_mean=True, remove_atomrefs=False),
        trn.CastTo32()
    ],
    
    num_train=800,
    num_val=100,
    num_test=100,
    
    pin_memory=True, # set to false, when not using a GPU
    
)
hessianData.prepare_data()
hessianData.setup()

100%|██████████| 80/80 [00:14<00:00,  5.43it/s]


In [4]:
print('Number of reference calculations:', len(hessianData.dataset))
print('Number of train data:', len(hessianData.train_dataset))
print('Number of validation data:', len(hessianData.val_dataset))
print('Number of test data:', len(hessianData.test_dataset))
print('Available properties:')

for p in hessianData.dataset.available_properties:
    print('-', p)
    

Number of reference calculations: 1000
Number of train data: 800
Number of validation data: 100
Number of test data: 100
Available properties:
- energy
- forces
- hessian


In [5]:
cutoff = 5.
n_atom_basis = 30

pairwise_distance = spk.atomistic.PairwiseDistances() # calculates pairwise distances between atoms
radial_basis = spk.nn.GaussianRBF(n_rbf=20, cutoff=cutoff)
paiNN = spk.representation.PaiNN(
    n_atom_basis=n_atom_basis, 
    n_interactions=3,
    radial_basis=radial_basis,
    cutoff_fn=spk.nn.CosineCutoff(cutoff)
)

pred_energy = spk.atomistic.Atomwise(n_in=n_atom_basis, output_key="energy")
pred_forces = spk.atomistic.Forces(energy_key="energy", force_key="forces")
pred_polarizability = spk.atomistic.Polarizability(n_in = n_atom_basis, polarizability_key = "polarizability")

nnpot = spk.model.NeuralNetworkPotential(
    representation=paiNN,
    input_modules=[pairwise_distance],
    output_modules=[pred_energy, pred_forces, pred_polarizability],
    postprocessors=[
        trn.CastTo64(),
        trn.AddOffsets("energy", add_mean=True, add_atomrefs=False)
    ]
)

output_energy = spk.task.ModelOutput(
    name="energy",
    loss_fn=torch.nn.MSELoss(),
    loss_weight=0.01,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)

output_forces = spk.task.ModelOutput(
    name="forces",
    loss_fn=torch.nn.MSELoss(),
    loss_weight=0.99,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)

output_polarizability = spk.task.ModelOutput(
    name="polarizability",
    loss_fn=torch.nn.MSELoss(),
    loss_weight=0,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)


task = spk.task.AtomisticTask(
    model=nnpot,
    outputs=[output_energy, output_forces],
    optimizer_cls=torch.optim.AdamW,
    optimizer_args={"lr": 1e-4}
)

directory_training = os.path.join(os.getcwd(), "maxim\\data\\ene_grad_hess_1000eth")
filepath_model = os.path.join(directory_training, "best_inference_model")

logger = pl.loggers.TensorBoardLogger(save_dir=directory_training)
callbacks = [
    spk.train.ModelCheckpoint(
        model_path=filepath_model,
        save_top_k=1,
        monitor="val_loss"
    )
]

trainer = pl.Trainer(
    callbacks=callbacks,
    logger=logger,
    default_root_dir=directory_training,
    max_epochs=5, # for testing, we restrict the number of epochs
)

trainer.fit(task, datamodule=hessianData)

c:\Users\Maxim\AppData\Local\Programs\Python\Python311\Lib\site-packages\pytorch_lightning\utilities\parsing.py:199: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                   | Params
---------------------------------------------------
0 | model   | NeuralNetworkPotential | 43.1 K
1 | outputs | ModuleList             | 0     
---------------------------------------------------
43.1 K    Trainable params
0         Non-trainable params
43.1 K    Total params
0.173     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\Maxim\AppData\Local\Programs\Python\Python311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

c:\Users\Maxim\AppData\Local\Programs\Python\Python311\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 4: 100%|██████████| 80/80 [00:54<00:00,  1.47it/s, v_num=14, val_loss=0.000433]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 80/80 [00:54<00:00,  1.46it/s, v_num=14, val_loss=0.000433]


In [14]:
from ase import Atoms

# set device
#device = torch.device("cuda")
device = "cpu"

# load model
best_model = torch.load(filepath_model, map_location=device)

# set up converter
converter = spk.interfaces.AtomsConverter(
    neighbor_list=trn.ASENeighborList(cutoff=5.0), dtype=torch.float32, device=device
)


# create atoms object from dataset
structure = hessianData.test_dataset[0]
atoms = Atoms(
    numbers=structure[spk.properties.Z], positions=structure[spk.properties.R]
)

# convert atoms to SchNetPack inputs and perform prediction
inputs = converter(atoms)
results = best_model(inputs)

print(results)

{'forces': tensor([[ 0.0153,  0.0014, -0.0916],
        [-0.0186,  0.0672,  0.0408],
        [ 0.0394,  0.0225, -0.0069],
        [ 0.0048,  0.0030,  0.0374],
        [-0.0145,  0.0242,  0.0312],
        [-0.0271, -0.0990,  0.0203],
        [ 0.0750, -0.0212, -0.0069],
        [-0.0466,  0.0160, -0.0440],
        [-0.0278, -0.0142,  0.0196]], dtype=torch.float64,
       grad_fn=<ToCopyBackward0>), 'polarizability': tensor([[[-0.1886, -0.0071,  0.0048],
         [-0.0071, -0.1638,  0.0053],
         [ 0.0048,  0.0053, -0.1917]]], dtype=torch.float64,
       grad_fn=<ToCopyBackward0>), 'energy': tensor([-154.6937], dtype=torch.float64, grad_fn=<AddBackward0>)}


In [26]:
temp = hessianData
print(temp)

<schnetpack.data.atoms.ASEAtomsData object at 0x000001490E82D690>
