In [1]:
from ase.io import read
from quests.descriptor import get_descriptors
from quests.entropy import perfect_entropy, diversity

In [3]:
data_name = "Graphite"
# data_name = "Diamond"
# data_name = "Graphene"
# data_name = "Nanotubes"
path = f"/home/evere/dev/quests/examples/gap20/{data_name}.xyz"
frames_list = read(path, index=":")

In [4]:
x = get_descriptors(frames_list, k=32, cutoff=5.0)

In [5]:
h = 0.015
batch_size = 10000
H = perfect_entropy(x, h=h, batch_size=batch_size)
D = diversity(x, h=h, batch_size=batch_size)
print(H)
print(D)

5.6085074467370095
6.250767982075376


### Compare FCC structures with 1% strain and unstrained

In [23]:
from ase.build import bulk, make_supercell
import numpy as np

a1 = 3.58
supercell = 1
fcc1 = bulk('Au', 'fcc', a=a1, cubic=True)

fcc2 = fcc1.copy()
fcc2.set_cell(0.99 * fcc2.cell, scale_atoms=True)

# fcc1 = make_supercell(fcc1, np.eye(3) * supercell)
# fcc2 = make_supercell(fcc2, np.eye(3) * supercell)

x1 = get_descriptors([fcc1], k=32, cutoff=5.0)
x2 = get_descriptors([fcc2], k=32, cutoff=5.0)

distance = np.linalg.norm(x1 - x2)
print(distance)

0.055155477876466824


### Compute entropy with cross-validation

In [None]:
from sklearn.model_selection import KFold

path = "/home/evere/dev/quests/examples/gap20/Graphite.xyz"
frames_list = read(path, index=":")

frame_indices = list(range(len(frames_list)))

kf = KFold(n_splits=3, shuffle=True, random_state=0)
kf.get_n_splits(frame_indices)

h = 0.015
batch_size = 10000

i = 0
for train_frame_idx, test_frame_idx in kf.split(frame_indices):
    print("train")
    print(train_frame_idx)
    print("test")
    print(test_frame_idx)
    atoms_train = [frames_list[i] for i in train_frame_idx]
    atoms_test = [frames_list[i] for i in test_frame_idx]

    descriptors_train = get_descriptors(atoms_train, k=32, cutoff=5.0)
    descriptors_test = get_descriptors(atoms_test, k=32, cutoff=5.0)

    H = perfect_entropy(descriptors_train, h=h, batch_size=batch_size)
    D = diversity(descriptors_train, h=h, batch_size=batch_size)
    print(f"Fold {i} train")
    print(H)
    print(D)

    H = perfect_entropy(descriptors_test, h=h, batch_size=batch_size)
    D = diversity(descriptors_test, h=h, batch_size=batch_size)
    print(f"Fold {i} test")
    print(H)
    print(D)

    i += 1

train
[  0   1   2   3   4   5   6   9  10  11  12  13  14  15  17  20  21  23
  25  28  29  30  31  32  34  35  36  38  39  41  42  43  46  47  48  49
  50  52  53  57  58  64  65  67  68  69  70  71  72  73  75  76  77  78
  79  81  82  83  84  87  88  89  91  93  94  96  99 100 102 103 104 105
 106 107 108 113 114 115 116 117 123 124 125 126 127 129 130 132 134 137
 138 139 140 141 142 144 148 149 150 152 153 154 155 156 158 159]
test
[  7   8  16  18  19  22  24  26  27  33  37  40  44  45  51  54  55  56
  59  60  61  62  63  66  74  80  85  86  90  92  95  97  98 101 109 110
 111 112 118 119 120 121 122 128 131 133 135 136 143 145 146 147 151 157]
Fold 0 train
5.428554636835005
6.082262016700815
Fold 0 test
5.42366940317182
5.9775183504244165
train
[  0   1   4   5   7   8   9  16  17  18  19  21  22  24  25  26  27  28
  29  31  32  33  34  36  37  38  39  40  41  42  44  45  47  51  53  54
  55  56  57  58  59  60  61  62  63  65  66  67  70  72  74  77  79  80
  81  82  85  86