In [6]:
import numpy as np

from ase import Atoms
from ase.io import read, write

from pyace import create_multispecies_basis_config
from pyace.activelearning import compute_B_projections

In [7]:
basis_config = {
  "deltaSplineBins": 0.001,
  "elements": ['C'],

  "embeddings": {
    "ALL": {
      "npot": 'FinnisSinclairShiftedScaled', # ?
      "fs_parameters": [ 1, 1],
      "ndensity": 1, # ?
    },
  },

  "bonds": {
    "ALL": {
      "radbase": "SBessel",
      "radparameters": [ 5.25 ],
      "rcut": 6, # ?
      "dcut": 0.01, # ?
    }
  },

  "functions": {
    # "number_of_functions_per_element": 1000,
    "ALL": {
        "nradmax_by_orders": [ 8,8,6],
        "lmax_by_orders"   : [ 0,6,4] }
  }
}

In [8]:
basis = create_multispecies_basis_config(basis_config)
basis

BBasisConfiguration(deltaSplineBins=0.001, funcspecs_blocks=['C', ])

### Compute entropy

In [9]:
data_name = "Graphite"
# data_name = "Diamond"
# data_name = "Graphene"
# data_name = "Nanotubes"
path = f"/home/evere/dev/quests/examples/gap20/{data_name}.xyz"
frames_list = read(path, index=":")
descriptor_ace_dict = compute_B_projections(basis, frames_list)
descriptor_ace_dict

# path = /p/lustre2/samanta1/Cu/step10_PotFit/test1/list20.txt
# with open(path) as f1:
#     for l1 in f1.readlines():
        # cfg1 = read(l1.split()[1] + '/OUTCAR')
#         dscrptr_ace = compute_B_projections(basis, atomic_env_list=[cfg1])
#         ace_list += list(dscrptr_ace[0])

{0: array([[ 5.30699625,  4.87772161,  4.63383655, ...,  0.06639799,
          0.03726733, -0.00743433],
        [ 5.30169047,  4.8585322 ,  4.57873826, ...,  4.82295864,
          0.13828475, -5.12933646],
        [ 5.30288263,  4.85515545,  4.57593924, ...,  4.78332148,
          0.13571487, -5.0994087 ],
        ...,
        [ 4.20097867,  1.80477498,  1.07852852, ..., -0.08517027,
         -0.71969204,  1.91552557],
        [ 4.20097867,  1.80477498,  1.07852852, ..., -0.08517027,
         -0.71969204,  1.91552557],
        [ 4.20097867,  1.80477498,  1.07852852, ..., -0.08517027,
         -0.71969204,  1.91552557]])}

In [10]:
descriptor_ace = descriptor_ace_dict[0]
descriptor_ace.shape
len(frames_list)

160

In [None]:
from quests.entropy import perfect_entropy, diversity

# h = 0.015
# h = 24.734
# h = 25.0
h = 95.45151481768947
batch_size = 10000
H = perfect_entropy(descriptor_ace, h=h, batch_size=batch_size)
D = diversity(descriptor_ace, h=h, batch_size=batch_size)
print(H)
print(D)

2.0558518468455733
2.231900011578892


### Compute entropy with cross-validation

In [None]:
data_name = "Graphite"
path = f"/home/evere/dev/quests/examples/gap20/{data_name}.xyz"
frames_list = read(path, index=":")
frame_indices = list(range(len(frames_list)))

In [None]:
from sklearn.model_selection import KFold
from quests.entropy import perfect_entropy, diversity

kf = KFold(n_splits=30, shuffle=True, random_state=0)
kf.get_n_splits(frame_indices)

h = 25.0
batch_size = 10000

i = 0
for train_frame_idx, test_frame_idx in kf.split(frame_indices):
    print("train")
    print(train_frame_idx)
    print("test")
    print(test_frame_idx)
    atoms_train = [frames_list[i] for i in train_frame_idx]
    atoms_test = [frames_list[i] for i in test_frame_idx]

    descriptors_train = compute_B_projections(basis, atoms_train)[0]
    descriptors_test = compute_B_projections(basis, atoms_test)[0]

    H = perfect_entropy(descriptors_train, h=h, batch_size=batch_size)
    D = diversity(descriptors_train, h=h, batch_size=batch_size)
    print(f"Fold {i} train")
    print(H)
    print(D)

    H = perfect_entropy(descriptors_test, h=h, batch_size=batch_size)
    D = diversity(descriptors_test, h=h, batch_size=batch_size)
    print(f"Fold {i} test")
    print(H)
    print(D)

    i += 1