In [1]:
import numpy as np

from ase import Atoms
from ase.io import read, write

from pyace import create_multispecies_basis_config
from pyace.activelearning import compute_B_projections

from quests.entropy import perfect_entropy, diversity

In [2]:
# basis_config = {
#   "deltaSplineBins": 0.001,
#   "elements": ['C'],

#   "embeddings": {
#     "ALL": {
#       "npot": 'FinnisSinclairShiftedScaled', # ?
#       "fs_parameters": [ 1, 1],
#       "ndensity": 1, # ?
#     },
#   },

#   "bonds": {
#     "ALL": {
#       "radbase": "SBessel",
#       "radparameters": [ 5.25 ],
#       "rcut": 6, # ?
#       "dcut": 0.01, # ?
#     }
#   },

#   "functions": {
#     # "number_of_functions_per_element": 1000,
#     "ALL": {
#         "nradmax_by_orders": [ 8,8,6],
#         "lmax_by_orders"   : [ 0,6,4] }
#   }
# }

basis_config = {
  "deltaSplineBins": 5e-5,
  "elements": ['C'],

  "embeddings": {
    "ALL": {
      "npot": 'FinnisSinclairShiftedScaled', # ?
      "fs_parameters": [ 3.0, 0.8],
      "ndensity": 1, # ?
    },
  },

  "bonds": {
    "ALL": {
      "radbase": "SBessel",
      "radparameters": [5.5],
      "rcut": 5.5, # ?
      "dcut": 0.2, # ?
    }
  },

  "functions": {
    # "number_of_functions_per_element": 1000,
    "ALL": {
        "nradmax_by_orders": [8, 4, 2],
        "lmax_by_orders"   : [8, 6, 2], }
  }
}

In [3]:
basis = create_multispecies_basis_config(basis_config)
basis

BBasisConfiguration(deltaSplineBins=5e-05, funcspecs_blocks=['C', ])

### Compute entropy

In [4]:
data_name = "Graphite"
# data_name = "Diamond"
# data_name = "Graphene"
# data_name = "Nanotubes"
path = f"/home/grethel/dev/quests/examples/gap20/{data_name}.xyz"
frames_list = read(path, index=":")
descriptor_ace_dict = compute_B_projections(basis, frames_list)
descriptor_ace_dict

{0: array([[ 4.51443924e+00,  3.96547269e+00,  3.72082915e+00, ...,
          3.31270086e+00,  5.98266870e-06, -6.10248242e-01],
        [ 4.50669618e+00,  3.93120648e+00,  3.58106132e+00, ...,
          3.47885877e+00,  4.15414990e-05, -6.64967431e-01],
        [ 4.50841578e+00,  3.92903604e+00,  3.57994266e+00, ...,
          3.50195294e+00, -1.43376728e-05, -6.72060372e-01],
        ...,
        [ 3.89648104e+00,  1.78886636e+00,  5.82483220e-01, ...,
          3.05233206e+00, -4.30436340e-04,  1.78151426e+00],
        [ 3.89648104e+00,  1.78886636e+00,  5.82483220e-01, ...,
          3.05233206e+00, -4.30436340e-04,  1.78151426e+00],
        [ 3.89648104e+00,  1.78886636e+00,  5.82483220e-01, ...,
          3.05233206e+00, -4.30436340e-04,  1.78151426e+00]])}

In [5]:
descriptor_ace = descriptor_ace_dict[0]
descriptor_ace.shape
len(frames_list)

160

In [None]:
# h = 0.015
# h = 24.734
# h = 10
# h = 24.743599279318804
# h = 47.725757408845844
# h = 0.3472375667701281
# h = 0.069486
h = 3.34138
batch_size = 10000
H = perfect_entropy(descriptor_ace, h=h, batch_size=batch_size)
D = diversity(descriptor_ace, h=h, batch_size=batch_size)
print(H)
print(D)

In [None]:
# data_names = ["Graphene",
#               "Diamond",
#               "Graphite",
#               "Nanotubes",
#               "Fullerenes",
#               "Defects",
#               "Surfaces",
#               "Liquid",
#               "Amorphous_Bulk",
#              ]
data_names = [
              "Defects",
              "Surfaces",
              "Liquid",
              "Amorphous_Bulk",
             ]


for data_name in data_names:
    path = f"/home/grethel/dev/quests/examples/gap20/{data_name}.xyz"
    frames_list = read(path, index=":")
    descriptor_ace = compute_B_projections(basis, frames_list)[0]
    h = 3.34138
    batch_size = 10000
    H = perfect_entropy(descriptor_ace, h=h, batch_size=batch_size)
    print(f"{data_name} entropy: {H}")

### Compute entropy with cross-validation

In [None]:
data_name = "Graphite"
path = f"/home/evere/dev/quests/examples/gap20/{data_name}.xyz"
frames_list = read(path, index=":")
frame_indices = list(range(len(frames_list)))

In [None]:
from sklearn.model_selection import KFold
from quests.entropy import perfect_entropy, diversity

kf = KFold(n_splits=30, shuffle=True, random_state=0)
kf.get_n_splits(frame_indices)

h = 25.0
batch_size = 10000

i = 0
for train_frame_idx, test_frame_idx in kf.split(frame_indices):
    print("train")
    print(train_frame_idx)
    print("test")
    print(test_frame_idx)
    atoms_train = [frames_list[i] for i in train_frame_idx]
    atoms_test = [frames_list[i] for i in test_frame_idx]

    descriptors_train = compute_B_projections(basis, atoms_train)[0]
    descriptors_test = compute_B_projections(basis, atoms_test)[0]

    H = perfect_entropy(descriptors_train, h=h, batch_size=batch_size)
    D = diversity(descriptors_train, h=h, batch_size=batch_size)
    print(f"Fold {i} train")
    print(H)
    print(D)

    H = perfect_entropy(descriptors_test, h=h, batch_size=batch_size)
    D = diversity(descriptors_test, h=h, batch_size=batch_size)
    print(f"Fold {i} test")
    print(H)
    print(D)

    i += 1