In [80]:
#%pip install ase==3.22.1
#%pip install numpy==1.24.4
#%pip install dscribe==2.5.0
#%pip install scipy

# Compare between each substances Euclid Distance and Cosine Similarity
# Brownmillerite perovskite structure (POSCAR INPUT) 
Structure sponsured by Takatsuka san

In [81]:
from ase.io import read
from ase import Atoms

atoms = read("BM_POSCAR")
atoms_CHGNet = read("CHGNet_POSCAR")
atoms_mattersim = read("MatterSim_POSCAR")

In [82]:
#Parameters setting
r_cut=5
n_max=2
l_max=2

In [83]:
species = list(set(atoms.get_chemical_symbols()))
print(species)

['Co', 'O', 'Sr']


In [84]:
import numpy as np
from dscribe.descriptors import SOAP
from ase import Atoms



species = list(set(atoms.get_chemical_symbols()))

# Create SOAP descriptor with averaging mode
soap = SOAP(
    species=species,
    periodic=True,
    r_cut=r_cut,
    n_max=n_max,
    l_max=l_max,
    average="inner"  # Proper average mode
)

'''
vec_bm = soap.create(atoms)
vec_bm_mattersim = soap.create(atoms_mattersim)
vec_CHGNet = soap.create(atoms_CHGNet)
'''

'\nvec_bm = soap.create(atoms)\nvec_bm_mattersim = soap.create(atoms_mattersim)\nvec_CHGNet = soap.create(atoms_CHGNet)\n'

# Create average vector for each substance

In [85]:
#local descriptor into a global descriptor is simply by taking the average over all sites
average_soap = SOAP(
    species=species,
    r_cut=r_cut,
    n_max=n_max,
    l_max=l_max,
    average="inner",
    sparse=False
)
average_bm = average_soap.create(atoms)
print("Average SOAP Brownmillerite",average_bm.shape)

average_bm_mattersim = average_soap.create(atoms_mattersim)
print("Average SOAP Brownmillerite by mattersim",average_bm_mattersim.shape)


average_bm_CHGNet = average_soap.create(atoms_CHGNet)
print("Average SOAP Brownmillerite by CHGNET",average_bm_CHGNet.shape)

Average SOAP Brownmillerite (63,)
Average SOAP Brownmillerite by mattersim (63,)
Average SOAP Brownmillerite by CHGNET (63,)


# Euclid Distance calculation

In [86]:
from scipy.spatial.distance import pdist, squareform
import numpy as np

molecules_bm_vs_mattersim = np.vstack([average_bm, average_bm_mattersim])
distance_bm_vs_mattersim = squareform(pdist(molecules_bm_vs_mattersim))
print("Distance matrix between brownmillerite and mattersim created: ")
print(distance_bm_vs_mattersim)
#Euclid Distance calculate
print("Euclid distance between brownmillerite and mattersim created: ",np.linalg.norm(distance_bm_vs_mattersim))
print("")

molecules_bm_vs_CHGNet = np.vstack([average_bm, average_bm_CHGNet])
distance_bm_vs_CHGNet = squareform(pdist(molecules_bm_vs_CHGNet))
print("Distance matrix between brownmillerite and CHGNet: ")
print(distance_bm_vs_CHGNet)
#Euclid Distance calculate
print("Euclid distance between brownmillerite and CHGNet: ",np.linalg.norm(distance_bm_vs_CHGNet))
print("")

molecules_CHGNet_vs_mattersim = np.vstack([average_bm_CHGNet,average_bm_mattersim])
distance_CHGNet_vs_mattersim = squareform(pdist(molecules_CHGNet_vs_mattersim))
print("Distance matrix between CHGNet and mattersim: ")
print(distance_CHGNet_vs_mattersim)
#Euclid Distance calculate
print("Euclid distance between CHGNet and mattersim: ",np.linalg.norm(distance_CHGNet_vs_mattersim))
print("")


Distance matrix between brownmillerite and mattersim created: 
[[ 0.         44.10008712]
 [44.10008712  0.        ]]
Euclid distance between brownmillerite and mattersim created:  62.36694131157275

Distance matrix between brownmillerite and CHGNet: 
[[ 0.        45.5252145]
 [45.5252145  0.       ]]
Euclid distance between brownmillerite and CHGNet:  64.38237577520665

Distance matrix between CHGNet and mattersim: 
[[0.         8.95452902]
 [8.95452902 0.        ]]
Euclid distance between CHGNet and mattersim:  12.663616383967895



# Cosine Similarity Calculation

The similarity can take values between -1 and +1. Smaller angles between vectors produce larger cosine values, indicating greater cosine similarity. 

cos(angle)= dot product between vectors /(L2 norm of vector1)* (L2 norm of vector2)

For example:
- When two vectors have the same orientation, the angle between them is 0, and the cosine similarity is 1.
- Perpendicular vectors have a 90-degree angle between them and a cosine similarity of 0.
- Opposite vectors have an angle of 180 degrees between them and a cosine similarity of -1.

Ref: https://www.learndatasci.com/glossary/cosine-similarity/

In [87]:
import math

def cos_sim(p, q):
    return np.dot(p, q) / (np.linalg.norm(p) * np.linalg.norm(q))

cos_sim_bm_vs_mattersim =cos_sim(average_bm, average_bm_mattersim)
print("cos_sim_bm_vs_mattersim: ",cos_sim_bm_vs_mattersim)

cos_sim_bm_vs_CHGNet =cos_sim(average_bm, average_bm_CHGNet)
print("cosine similarity bm_vs_CHGNet: ",cos_sim_bm_vs_CHGNet)


cos_sim_mattersim_vs_CHGNet =cos_sim(average_bm_mattersim, average_bm_CHGNet)
print("cosine similarity mattersim_vs_CHGNet: ",cos_sim_mattersim_vs_CHGNet)

cos_sim_bm_vs_mattersim:  0.99773912340735
cosine similarity bm_vs_CHGNet:  0.999191050239698
cosine similarity mattersim_vs_CHGNet:  0.9986371915898893
