In [1]:
import numpy as np
from GMPFeaturizer import GMPFeaturizer, ASEAtomsConverter, PymatgenStructureConverter
import pickle

# the example data is a list of ase atoms object
with open("./example.p", "rb") as f:
    images = pickle.load(f)

# Initialize the converter for ASE atoms objects
# there is a pre-defined converter for pymatgen structure objects too
converter = ASEAtomsConverter()
# converter = PymatgenStructureConverter()


### Basic feature computation usage
the list of features is the Cartesian product of orders and sigams (except for order -1, which correspond just local electron density, so different simgas does not matter. Thus, there is only one feature for order -1)


with this setting, the list of features are

[(-1, 0), (0, 0.1), (0, 0.2), (0, 0.3), (1, 0.1), (1, 0.2), (1, 0.3), (2, 0.1), (2, 0.2), (2, 0.3)]


where the first number is the order of the MCSH angular probe, and the second number is the sigma of the Gaussian radial probe 

In [2]:
GMPs = {
    "GMPs": {   
        "orders": [-1, 0, 1, 2], 
        "sigmas": [0.1, 0.2, 0.3]   
    },
    "psp_path": "./NC-SR.gpsp", # path to the pseudo potential file
    "overlap_threshold": 1e-16, # basically the accuracy of the resulting features
    # "square": False, # whether the features are squared, no need to change if you are not get the feature derivatives
}

In [3]:
featurizer1 = GMPFeaturizer(GMPs=GMPs, calc_derivatives=False, converter=converter)
features1 = featurizer1.prepare_features(images, cores=5)

2023-06-07 13:01:31,688	INFO worker.py:1518 -- Started a local Ray instance.
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 123.74it/s]


In [4]:
print(features1[0].keys())
print(features1[0]["features"].shape)
print(features1[0]["features"])

dict_keys(['features'])
(11, 10)
[[2.93183003e-01 9.60240782e-01 1.40524089e+00 1.31511761e+00
  2.86230291e-03 1.26922803e-02 2.94398250e-02 6.66115305e-04
  1.06882066e-02 4.78447126e-02]
 [2.96332206e-01 9.64242416e-01 1.41236217e+00 1.32908197e+00
  2.74331782e-03 1.15191376e-02 2.40127868e-02 6.77798619e-04
  1.11879872e-02 5.27952150e-02]
 [3.00427205e-01 9.67787622e-01 1.41685860e+00 1.34360709e+00
  4.75936794e-05 2.24163133e-04 5.19144968e-04 1.89493491e-05
  3.35811909e-04 1.69736826e-03]
 [2.96332022e-01 9.64242217e-01 1.41236192e+00 1.32908160e+00
  2.74334381e-03 1.15192491e-02 2.40130429e-02 6.77798488e-04
  1.11879799e-02 5.27951510e-02]
 [2.93183305e-01 9.60241095e-01 1.40524122e+00 1.31511795e+00
  2.86231120e-03 1.26923119e-02 2.94398806e-02 6.66116750e-04
  1.06882259e-02 4.78447713e-02]
 [3.16812563e-01 9.85417836e-01 1.43921041e+00 1.37097949e+00
  4.14700294e-03 1.88684883e-02 4.64777708e-02 8.17514868e-04
  1.51320434e-02 7.74136160e-02]
 [6.71012178e-01 2.234380

### Get feature derivative with respect to atom positions
Note that the derivatives are stored in the form of sparse matrices

In [5]:
featurizer2 = GMPFeaturizer(GMPs=GMPs, calc_derivatives=True, converter=converter)
features2 = featurizer2.prepare_features(images, cores=5) 

2023-06-07 13:01:50,840	INFO worker.py:1518 -- Started a local Ray instance.
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 115.74it/s]


In [6]:
print(features2[0].keys())
print(features2[0]["feature_primes"].keys())

dict_keys(['features', 'feature_primes'])
dict_keys(['size', 'row', 'col', 'val'])


### Manually specify the list of features (not using the Cartesian product)

In [7]:
GMPs_2 = {
    "GMPs_detailed_list": [(-1,0), (0, 0.1), (0, 0.2), (0, 0.3), (1, 0.2), (1, 0.3), (2, 0.3)],
    "psp_path": "./NC-SR.gpsp", # path to the pseudo potential file
    "overlap_threshold": 1e-16, # basically the accuracy of the resulting features
    # "square": False, # whether the features are squared, no need to change if you are not get the feature derivatives
}

In [8]:
featurizer3 = GMPFeaturizer(GMPs=GMPs_2, calc_derivatives=True, converter=converter)
features3 = featurizer3.prepare_features(images, cores=5) 

2023-06-07 13:02:03,074	INFO worker.py:1518 -- Started a local Ray instance.
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 115.59it/s]


### Save calculated feature to / load calculated feature from local folder
Simply set "save_features=True" when calling the prepare_features function.

The path to the local database is set when initializing the featurizer

In [9]:
featurizer4 = GMPFeaturizer(GMPs=GMPs_2, converter=converter, calc_derivatives=False, feature_database="cache/features/")
features4 = featurizer4.prepare_features(images, cores=5, save_features=True) 

2023-02-22 11:12:36,441	INFO worker.py:1518 -- Started a local Ray instance.
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 145.94it/s]
