In [None]:
%%capture install
try:
  import imlms
  print('Already installed')
except:
  %pip install git+https://github.com/Mads-PeterVC/imlms

In [None]:
print(install.stdout.splitlines()[-1])

# Descriptors

In [None]:
import torch
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
import numpy as np
from imlms.potentials.load_carbon_data import get_invariances_examples
from agox.utils.plot import plot_atoms, plot_cell

### Example: Atomic Descriptors

In [None]:
def atomic_fingerprint(positions, cutoff=5.0, n=16, sigma=0.5):
    R = cdist(positions, positions).reshape(len(positions), len(positions), 1)
    r0 = np.linspace(0, cutoff, n).reshape(1, 1, n)
    z = np.exp(-(R-r0)**2/ sigma**2) * np.cos(np.pi/2 * r0 / cutoff) * (r0 < cutoff)
    return z.sum(axis=1)

In [None]:
chain, ring = get_invariances_examples()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(6, 3))

labels = ['Chain', 'Ring']
for ax, atoms, label in zip(axes, [chain, ring], labels):
    plot_cell(ax, atoms.cell, collection_kwargs={'alpha': 0})
    plot_atoms(ax, atoms)
    for atom in atoms:
        ax.text(atom.x, atom.y, atom.index, fontsize=8, color='black', ha='center', va='center')
    ax.set_title(label)

Previously we have used descriptors of configurations, but we can also make 
descriptors of atoms - or as it is typically called descriptors of the atomic 
environment.

The `atomic_fingerprint` function above computes such atomic environment descriptors.

The cell below computes these fingerprint for the two example structures

In [None]:
ring_fingerprints = atomic_fingerprint(ring.positions)
chain_fingerprints = atomic_fingerprint(chain.positions)

print("Ring", ring_fingerprints.shape)
print("Chain", chain_fingerprints.shape)

From the shapes we see that for each example structure we have 6 vectors, corresponding to the number of atoms, with 16 entries. 

In [None]:
def dot_product_similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

Like before we can use the dot product similarity measure to compare the different atomic descriptors.
From the figure above we can tell that 

- For the chain, atoms 4 and 5 are at either end of the chain. Which are very similar environments, so we would expect the fingerprints of these these two atoms to be very similar. 

- Atoms 1 and 2 of the chain are both in the center, so we expect them to be very similar.

- Atom 1 of the chain is in the middle which is quite different from atom 4 at the end, so the similarity between their descriptors should be lower.

- All atoms of the ring structure are similar, because of its symmetry, so the similarity between environments should be high. 

In [None]:
chain_45 = dot_product_similarity(chain_fingerprints[4], chain_fingerprints[5])
print(f"Chain 4-5 {chain_45:0.4f}")

chain_12 = dot_product_similarity(chain_fingerprints[1], chain_fingerprints[2])
print(f"Chain 1-2 {chain_12:0.4f}")

chain_14 = dot_product_similarity(chain_fingerprints[1], chain_fingerprints[4])
print(f"Chain 1-4 {chain_14:0.4f}")

As before we can make a similarity matrix to get a visual representation of the similarity. 

In [None]:
D_ring = np.zeros((6, 6))
D_chain = np.zeros((6, 6))
D_ring_vs_chain = np.zeros((6, 6))
for i in range(6):
    for j in range(6):
        D_ring[i, j] = dot_product_similarity(ring_fingerprints[i], ring_fingerprints[j])
        D_chain[i, j] = dot_product_similarity(chain_fingerprints[i], chain_fingerprints[j])
        D_ring_vs_chain[i, j] = dot_product_similarity(ring_fingerprints[i], chain_fingerprints[j])

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(12, 4))

c = ax[0].imshow(D_ring, cmap='viridis', vmin=0.9, vmax=1.0)
ax[0].set_title('Ring')

ax[1].imshow(D_chain, cmap='viridis', vmin=0.9, vmax=1.0)
ax[1].set_title('Chain')

ax[2].imshow(D_ring_vs_chain, cmap='viridis', vmin=0.9, vmax=1.0)
ax[2].set_title('Ring vs Chain')

for a in ax[0:1]:
    a.set_xlabel('Atom index')
    a.set_ylabel('Atom index')

ax[2].set_xlabel('Chain Atom index')
ax[2].set_ylabel('Ring Atom index')