In [None]:
%%capture install
try:
  import imlms
  print('Already installed')
except:
  %pip install git+https://github.com/Mads-PeterVC/imlms

In [None]:
print(install.stdout.splitlines()[-1])

# Descriptors

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import torch
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
import numpy as np
from imlms.potentials.load_carbon_data import get_carbon_cluster_data, get_atomic_dataset, get_invariances_examples, to_atoms
from agox.utils.plot import plot_atoms, plot_cell
from tqdm import tqdm

### Example: Atomic Descriptors

In [None]:
def atomic_fingerprint(positions, cutoff=5.0, n=16, sigma=0.5):
    R = cdist(positions, positions).reshape(len(positions), len(positions), 1)
    r0 = np.linspace(0, cutoff, n).reshape(1, 1, n)
    z = np.exp(-(R-r0)**2/ sigma**2) * np.cos(np.pi/2 * r0 / cutoff) * (r0 < cutoff)
    return z.sum(axis=1)

In [None]:
chain, ring = get_invariances_examples()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(6, 3))

labels = ['Chain', 'Ring']
for ax, atoms, label in zip(axes, [chain, ring], labels):
    plot_cell(ax, atoms.cell, collection_kwargs={'alpha': 0})
    plot_atoms(ax, atoms)
    for atom in atoms:
        ax.text(atom.x, atom.y, atom.index, fontsize=8, color='black', ha='center', va='center')
    ax.set_title(label)

Previously we have used descriptors of configurations, but we can also make 
descriptors of atoms - or as it is typically called descriptors of the atomic 
environment.

The `atomic_fingerprint` function above computes such atomic environment descriptors.

The cell below computes these fingerprint for the two example structures

In [None]:
ring_fingerprints = atomic_fingerprint(ring.positions, n=32, sigma=0.25)
chain_fingerprints = atomic_fingerprint(chain.positions, n=32, sigma=0.25)

print("Ring", ring_fingerprints.shape)
print("Chain", chain_fingerprints.shape)

From the shapes we see that for each example structure we have 6 vectors, corresponding to the number of atoms, with 16 entries. 

In [None]:
def dot_product_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

Like before we can use the dot product similarity measure to compare the different atomic descriptors.
From the figure above we can tell that 

- For the chain, atoms 4 and 5 are at either end of the chain. Which are very similar environments, so we would expect the fingerprints of these these two atoms to be very similar. 

- Atoms 1 and 2 of the chain are both in the center, so we expect them to be very similar.

- Atom 1 of the chain is in the middle which is quite different from atom 4 at the end, so the similarity between their descriptors should be lower.

- All atoms of the ring structure are similar, because of its symmetry, so the similarity between environments should be high. 

In [None]:
chain_45 = dot_product_similarity(chain_fingerprints[4], chain_fingerprints[5])
print(f"Chain 4-5 {chain_45:0.4f}")

chain_12 = dot_product_similarity(chain_fingerprints[1], chain_fingerprints[2])
print(f"Chain 1-2 {chain_12:0.4f}")

chain_14 = dot_product_similarity(chain_fingerprints[1], chain_fingerprints[4])
print(f"Chain 1-4 {chain_14:0.4f}")

As before we can make a similarity matrix to get a visual representation of the similarity. 

In [None]:
D_ring = np.zeros((6, 6))
D_chain = np.zeros((6, 6))
D_ring_vs_chain = np.zeros((6, 6))
for i in range(6):
    for j in range(6):
        D_ring[i, j] = dot_product_similarity(ring_fingerprints[i], ring_fingerprints[j])
        D_chain[i, j] = dot_product_similarity(chain_fingerprints[i], chain_fingerprints[j])
        D_ring_vs_chain[i, j] = dot_product_similarity(ring_fingerprints[i], chain_fingerprints[j])

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(9, 4), layout='constrained')

c = ax[0].imshow(D_ring, cmap='viridis', vmin=0.9, vmax=1.0)
ax[0].set_title('Ring')

ax[1].imshow(D_chain, cmap='viridis', vmin=0.9, vmax=1.0)
ax[1].set_title('Chain')

ax[2].imshow(D_ring_vs_chain, cmap='viridis', vmin=0.9, vmax=1.0)
ax[2].set_title('Ring vs Chain')

colorbar = fig.colorbar(c, ax=ax, orientation='horizontal')

for a in ax[0:1]:
    a.set_xlabel('Atom index')
    a.set_ylabel('Atom index')

ax[2].set_xlabel('Chain Atom index')
ax[2].set_ylabel('Ring Atom index')

Try to understand why they look like this

You can also play around with the parameters `n` and `sigma` for the `atomic_fingerprint`.

### Exercise: Learning atomic properties

With descriptors of atomic environments we can build models that learn atomic properties. 

For illustrative purposes we will build a model that predicts the number close neighbours an atom has. 

The cell below loads the dataset.

In [None]:
X_cart, Y = get_atomic_dataset(8, threshold=2.0)

X_fingerprint = torch.vstack([torch.tensor(atomic_fingerprint(x)).float() for x in X_cart])
y_tensor = torch.tensor(Y).flatten().float()

print(f"{X_cart.shape = }")
print(f"{X_fingerprint.shape = }")
print(f"{y_tensor.shape = }")

In [None]:
sz = 2
fig, axes = plt.subplots(3, 5, figsize=(5*sz, 3*sz), layout='constrained')

for example_index, ax in enumerate(axes.flat):
    atoms = to_atoms(X_cart[example_index])

    plot_cell(ax, atoms.cell, collection_kwargs={'alpha': 0})
    plot_atoms(ax, atoms)

    for i, atom in enumerate(atoms):

        ax.text(atom.x, atom.y, Y[example_index][i], fontsize=8, color='black', ha='center', va='center')

Now we define a model, very much like we have done before

In [None]:
class CoordinationNumberModel(torch.nn.Module):

    def __init__(self, vector_dim, hidden_dim=32):
        super().__init__()
        layers = []
        layers.append(torch.nn.Linear(vector_dim, hidden_dim)) # vector_dim -> hidden_dim
        layers.append(torch.nn.ReLU())
        layers.append(torch.nn.Linear(hidden_dim, hidden_dim)) # hidden_dim -> hidden_dim
        layers.append(torch.nn.ReLU())
        layers.append(torch.nn.Linear(hidden_dim, 1)) # hidden_dim -> 1
        self.net = torch.nn.Sequential(*layers)

    def forward(self, x):
        coordination = self.net(x)
        return coordination

Now we would like to train our model. 

We will make a change compared to the training loop that we have used before, 
motivated by us having a lot of data now. 

Previously we have calculated the gradient of the loss calculated across all of our data. 
When we have relatively few training examples this is fine. This is standard gradient descent.

Instead we will now use **stochastic** gradient descent, where the stochastic (random) element 
comes from computing the gradient over smaller subsets of the data rather than the full dataset 
for each step. 

In [None]:
from torch.utils.data import DataLoader, TensorDataset

def training_loop(model, X_train, y_train, epochs=1000, lr=1e-3, batch_size=64):

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()

    # Create a DataLoader: Helps us iterate over the data in batches
    loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)

    for epoch in tqdm(range(epochs)): # An epoch is a complete pass through the dataset.
        for xb, yb in loader: # xb and yb are the batches of X_train and y_train.
            
            optimizer.zero_grad()
            y_pred = model(xb)
            loss = criterion(y_pred.flatten(), yb)
            loss.backward()
            optimizer.step()

In [None]:
model = CoordinationNumberModel(vector_dim=16)

training_loop(model, X_fingerprint, y_tensor, epochs=100, lr=1e-2, batch_size=128)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(3, 3))

y_pred = model(X_fingerprint).detach().numpy().flatten()

ax.scatter(y_tensor, y_pred, s=1)

You won't get perfect predictions, but you should get something that has a decent correlation.

Probably a better way of making a model for this property would be a classification model - because the 
predictions are discrete. (So if you want to, you can make such a model using code from the previous exercise about classification.)

We can easily use the model for atoms in configurations with different sizes. 

Use the model on a different dataset with more atoms, loaded below.

In [None]:
X_cart_9, Y_9 = get_atomic_dataset(9, threshold=2.0)

In [None]:
# Make the fingerprints
# Make predictions with the model
# Plot the predictions against the true values

### Example: Atomic energy model.

Now we will build a model for learning the total energy of atomic configurations using these atomic descriptors.

To do so, we will build a model that learns an "atomic energy", using the ansatz that the total energy of a 
configuration can be approximated as

$$
E_{tot} = \sum_{i}^N E_i
$$

Where $E_i$ is the local energy of the i'th atom. 

The cell below defines a neural network for this task, note the `forward` method is more 
complicated than we have seen before so some explanation is in order. 
First, the inputs to forward are

- `x`: This is a matrix of atomic descriptors where each row is an atom and the columns are the features. Or in other words it has shape (number of atoms, number of features).
- `config_indices`: This is an array with (number of atoms)-entries, that denotes which configuration this atom corresponds to. 
- `print_shapes`: This is just a switch to be used to print the shapes of the internal parameters.
- `return_atomic_energy`: Switch to return atomic energies in addition to the total energy.

With that we can describe what happens in the `forward` method

1. The atomic energies are predicted by passing the atomic descriptors through the neural network.

2. The total energy is computed as the sum of the atomic energy for each configuration. The `batch_sum`-method 
does this by summing together the atomic energy terms that have the same number in the `config_indices`-array.

In [None]:
class NeuralNetworkModel(torch.nn.Module):

    def __init__(self, vector_dim, hidden_dim=32):
        super().__init__()
        layers = []
        layers.append(torch.nn.Linear(vector_dim, hidden_dim)) # vector_dim -> hidden_dim
        layers.append(torch.nn.SiLU())
        layers.append(torch.nn.Linear(hidden_dim, hidden_dim)) # hidden_dim -> hidden_dim
        layers.append(torch.nn.SiLU())
        layers.append(torch.nn.Linear(hidden_dim, 1)) # hidden_dim -> 1
        self.net = torch.nn.Sequential(*layers)

    def batch_sum(self, atomic_energies, config_indices):        
        n_configs = torch.unique(config_indices).shape[0]
        total_energy = torch.zeros(n_configs)
        for i in range(n_configs):
            mask = config_indices == i
            total_energy[i] = atomic_energies[mask].sum()
        return total_energy

    def forward(self, x, config_indices, print_shapes=False, return_atomic_energy=False):
        atomic_energy = self.net(x)
        total_energy = self.batch_sum(atomic_energy, config_indices)

        if print_shapes:
            print("Input", x.shape)
            print("Atomic Energy", atomic_energy.shape)
            print("Total Energy", total_energy.shape)

        if not return_atomic_energy:
            return total_energy
        else:
            return total_energy, atomic_energy

The next two cells define the training loop and a method to get the dataset 
in an appropriate format.

In [None]:
def training_loop(model, X_train, index_train, y_train, epochs=200, lr=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.MSELoss()

    loss_per_epoch = torch.zeros(epochs)

    for epoch in tqdm(range(epochs)): # Loop over the dataset multiple times
        optimizer.zero_grad()   # Zero the gradients

        # Compute loss for the whole dataset as a batch
        E = model.forward(X_train, index_train).squeeze() # Compute the energy
        loss = loss_fn(E, y_train)

        # Compute gradient and update        
        loss.backward()         # Compute the gradient
        optimizer.step()        # Update the parameters
        loss_per_epoch[epoch] = loss.item() / len(X_train) # Store the loss
        
    return loss_per_epoch

In [None]:
def training_loop(model, X_train, index_train, y_train, epochs=200, lr=0.01, batch_size=32):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = torch.nn.MSELoss()

    loss_per_epoch = torch.zeros(epochs)

    unique_index_train = torch.unique(index_train)

    loader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(unique_index_train),
        batch_size=batch_size, shuffle=True
    )

    for epoch in tqdm(range(epochs)): # Loop over the dataset multiple times
        optimizer.zero_grad()   # Zero the gradients

        for (index_batch,) in loader:

            mask = torch.isin(index_train, index_batch)
            X_batch = X_train[mask]
            y_batch = y_train[index_batch]
            index_batch = index_train[mask]
        
            # Compute loss for the whole dataset as a batch
            E_batch = model.forward(X_batch, index_batch).squeeze() # Compute the energy
            loss = loss_fn(E_batch, y_batch)

            # Compute gradient and update        
            loss.backward()         # Compute the gradient
            optimizer.step()        # Update the parameters
            loss_per_epoch[epoch] += loss.item()

        loss_per_epoch[epoch] /= len(loader)
        
    return loss_per_epoch

In [None]:
def get_dataset(n_atoms, max_examples=None):
    fingerprints = []
    n_atoms_list = []
    energies = []
    for n in n_atoms:  
        X, E = get_carbon_cluster_data(n, max_examples=max_examples)
        fingerprints.extend([torch.tensor(atomic_fingerprint(x)).float() for x in X])
        n_atoms_list.extend([x.shape[0] for x in X])
        energies.extend(E)

    fingerprints = torch.vstack(fingerprints)
    energies = torch.tensor(np.array(energies))

    config_indices = torch.zeros(len(fingerprints), dtype=torch.int64)
    start = 0; c = 0
    for n in n_atoms_list:
        config_indices[start:start+n] = c
        start += n
        c += 1

    return fingerprints, energies, config_indices

Now we can test that everything works as we would like

In [None]:
train_fingerprints, train_energies, train_config_indices = get_dataset([6]) # Only configurations with 6 atoms.
model = NeuralNetworkModel(train_fingerprints[0].shape[-1]) 
prediction = model(train_fingerprints, train_config_indices, print_shapes=True)

In [None]:
model = NeuralNetworkModel(vector_dim=train_fingerprints[0].shape[-1])
loss_history = training_loop(model, 
                             train_fingerprints, 
                             train_config_indices, 
                             train_energies, 
                             epochs=10000, 
                             lr=1e-2)

In [None]:
fig, ax = plt.subplots(figsize=(3, 3))
ax.plot(loss_history)
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.set_yscale('log')

As a test set we will now use configurations with 7 atoms. 

Remember that the model was only trained on configurations with 6 atoms.

In [None]:
test_fingerprints, test_energies, test_config_indices = get_dataset([7]) # Only configurations with 6 atoms.

In [None]:
from imlms.potentials.plot_parity import plot_parity

E_pred_train = model(train_fingerprints, train_config_indices).detach().numpy()

E_pred_test = model(test_fingerprints, test_config_indices).detach().numpy()

fig, ax = plt.subplots(1, 2, figsize=(6, 3), layout="constrained")
plot_parity(ax[0], train_energies, E_pred_train)
ax[0].set_title('Train')

plot_parity(ax[1], test_energies, E_pred_test)
ax[1].set_title('Test')

The model performs rather well on the training set, but not so well on the test set. 

### Exercise: Model trained on differently sized configurations.

This loads the data for clusters of sizes betwee 6 and 8 atoms.

In [None]:
train_fingerprints, train_energies, train_config_indices = get_dataset([6, 7, 8], max_examples=100) # Configurations with 6 and 7 atoms.

Train the model using this data

In [None]:

fig, ax = plt.subplots(figsize=(3, 3))
ax.plot(loss_history)
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.set_yscale('log')

We can test the model on clusters of several different sizes.

In [None]:
test_fingerprints, test_energies, test_config_indices = get_dataset([6, 7, 8, 9, 10], max_examples=None) # Only configurations with 6 atoms.

In [None]:
from imlms.potentials.plot_parity import plot_parity

E_pred_train = model(train_fingerprints, train_config_indices).detach().numpy()

E_pred_test = model(test_fingerprints, test_config_indices).detach().numpy()

fig, ax = plt.subplots(1, 2, figsize=(6, 3), layout="constrained")
plot_parity(ax[0], train_energies, E_pred_train)
ax[0].set_title('Train')

plot_parity(ax[1], test_energies, E_pred_test)
ax[1].set_title('Test')