-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_pysipfenn.py
463 lines (396 loc) · 24.8 KB
/
test_pysipfenn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
import unittest
import pytest
import os
import numpy as np
import pysipfenn
from importlib import resources
from natsort import natsorted
from numpy import zeros
from pymatgen.core import Structure, Composition
IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" and os.getenv("MODELS_FETCHED") != "true"
class TestCore(unittest.TestCase):
'''Test the core functionality of the Calculator object and other high-level API functions. It does not test the
correctness of the descriptor generation functions or models, as these are delegated to other tests.
'''
def setUp(self):
'''Initialise the Calculator object for testing. It will be used in all tests and is not modified in any way
by them.
'''
self.c = pysipfenn.Calculator()
self.assertIsNotNone(self.c)
def testInit(self):
'''Test that the Calculator object is initialized correctly.'''
self.assertEqual(self.c.predictions, [])
self.assertEqual(self.c.toRun, [])
self.assertEqual(self.c.descriptorData, [])
self.assertEqual(self.c.inputFiles, [])
def testDestroy(self):
""" Test that the Calculator can deallocate itself (incl. loaded models and its data)."""
self.assertIsNotNone(self.c)
self.c.toRun = ['model1', 'model2']
self.c.descriptorData = [zeros([271])]*10000
self.c.destroy()
def detectModels(self):
'''Test that the updateModelAvailability() method works without errors and returns a list of available models.
'''
self.c.updateModelAvailability()
self.assertIsInstance(self.c.network_list_available, list)
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
def testDownloadAndLoadModels(self):
'''Tests that the downloadModels() method works without errors in a case whwere the models are not already
downloaded and loads them correctly using the loadModels() method. Then also load a model explicitly using
loadModel() and check that it is in the loadedModels list. Also check that error is raised correctly if
a non-available model is requested to be loaded.
'''
self.c.downloadModels(network='all')
self.c.loadModels(network='SIPFENN_Krajewski2020_NN24')
self.assertEqual(set(self.c.network_list_available), set(self.c.loadedModels.keys()))
self.assertIn('SIPFENN_Krajewski2020_NN24', self.c.loadedModels)
with self.assertRaises(ValueError):
self.c.loadModels(network='jx9348ghfmx8345wgyf')
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
def testFromPOSCAR_Ward2017(self):
'''Update the list of available models and identifies which models are compatible with the Ward2017 descriptor.
Then it runs featurization from the exampleInputFiles directory.
'''
self.c.updateModelAvailability()
toRun = list(set(self.c.findCompatibleModels('Ward2017')).intersection(set(self.c.network_list_available)))
if toRun:
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as testFileDir:
print(testFileDir)
self.c.runFromDirectory(testFileDir, 'Ward2017')
else:
raise ValueError('Did not detect any Ward2017 models to run')
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
def testFromPOSCAR_KS2022(self):
'''Update the list of available models and identifies which models are compatible with the KS2022 descriptor.
Then it runs featurization from the exampleInputFiles directory. It also tests the printout of the Calculator
object after the prediction run.
'''
self.c.updateModelAvailability()
toRun = list(set(self.c.findCompatibleModels('KS2022')).intersection(set(self.c.network_list_available)))
if toRun:
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as testFileDir:
print(testFileDir)
self.c.runFromDirectory(testFileDir, 'KS2022')
else:
raise ValueError('Did not detect any KS2022 models to run')
with self.subTest(msg='Test Calculator printout after predictions'):
printOut = str(self.c)
self.assertIn('pySIPFENN Calculator Object', printOut)
self.assertIn('Models are located in', printOut)
self.assertIn('Loaded Networks', printOut)
self.assertIn('Last files selected as input', printOut)
self.assertIn('Last Prediction Run Using', printOut)
self.assertIn('Last prediction run on', printOut)
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
def testFromStructure_KS2022_dilute(self):
'''Update the list of available models and identifies which models are compatible with the KS2022_dilute
featurization (KS2022 descriptor). Then it runs featurization from the exampleInputFiles directory. It also
then checks that the 'pure' convenience magic works correctly by comparing the results to the original pure
structure results.
'''
self.c.updateModelAvailability()
toRun = list(set(self.c.findCompatibleModels('KS2022')).intersection(set(self.c.network_list_available)))
if toRun:
matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {' \
'"matrix": [[2.318956, 0.000185, -0.819712], [-1.159251, 2.008215, -0.819524], [2.5e-05, ' \
'0.000273, 2.459206]], "pbc": [true, true, true], "a": 2.4595700289085083, ' \
'"b": 2.4593515311565364, "c": 2.4592060152801354, "alpha": 109.45958252256221, ' \
'"beta": 109.46706290007663, "gamma": 109.46912204302215, "volume": 11.453776235839058}, ' \
'"sites": [{"species": [{"element": "Fe", "occu": 1}], "abc": [0.0, 0.0, 0.0], "xyz": [0.0, 0.0, ' \
'0.0], "label": "Fe", "properties": {"magmom": 2.211}}], "@version": null}'
struct = Structure.from_str(matStr, fmt='json')
struct.make_supercell([2, 2, 2])
baseStruct = struct.copy()
struct.replace(0, 'Al')
preds1 = self.c.runModels_dilute(descriptor='KS2022',
structList=[struct],
baseStruct='pure',
mode='serial')
preds2 = self.c.runModels_dilute(descriptor='KS2022',
structList=[struct],
baseStruct=[baseStruct],
mode='serial')
for val1, val2 in zip(preds1[0], preds2[0]):
self.assertEqual(val1, val2)
else:
raise ValueError('Did not detect any KS2022 models to run')
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
def testFromPrototypes_KS2022_randomSolution(self):
"""Quick runtime test of the top level API for random solution structures. It does not test the accuracy, as
that is delegated elsewhere."""
self.c.updateModelAvailability()
toRun = list(set(self.c.findCompatibleModels('KS2022')).intersection(set(self.c.network_list_available)))
if toRun:
preds = self.c.runModels_randomSolutions(
descriptor='KS2022',
baseStructList='FCC',
compList='AuCu',
compositionConvergenceCriterion=0.05,
featureConvergenceCriterion=0.02,
minimumSitesPerExpansion=8,
mode='serial')
else:
raise ValueError('Did not detect any KS2022 models to run')
def test_descriptorCalculate_Ward2017_serial(self):
'''Test succesful execution of the descriptorCalculate() method with Ward2017 in series. A separate test for
calculation accuracy is done in test_Ward2017.py.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = os.listdir(exampleInputsDir)[:6]
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
descList = self.c.calculate_Ward2017(structList=testStructures, mode='serial')
self.assertEqual(len(descList), len(testStructures))
def test_descriptorCalculate_Ward2017_parallel(self):
'''Test succesful execution of the descriptorCalculate() method with Ward2017 in parallel. A separate test for
calculation accuracy is done in test_Ward2017.py.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = os.listdir(exampleInputsDir)[:6]
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
descList = self.c.calculate_Ward2017(structList=testStructures, mode='parallel', max_workers=2)
self.assertEqual(len(descList), len(testStructures))
def test_descriptorCalculate_KS2022_serial(self):
'''Test succesful execution of the descriptorCalculate() method with KS2022 in series. A separate test for
calculation accuracy is done in test_KS2022.py.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = os.listdir(exampleInputsDir)
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
descList = self.c.calculate_KS2022(structList=testStructures, mode='serial')
self.assertEqual(len(descList), len(testStructures))
def test_descriptorCalculate_KS2022_parallel(self):
'''Test succesful execution of the descriptorCalculate() method with KS2022 in parallel. A separate test for
calculation accuracy is done in test_KS2022.py.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = os.listdir(exampleInputsDir)
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
descList = self.c.calculate_KS2022(structList=testStructures, mode='parallel', max_workers=4)
self.assertEqual(len(descList), len(testStructures))
def test_descriptorCalculate_KS2022_dilute_serial(self):
"""Test succesful execution of the descriptorCalculate() method with KS2022_dilute in series based on an Al
prototype loaded from the default prototype library. A separate test for calculation accuracy is done in
test_KS2022.py"""
diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy()
diluteStruct.make_supercell([2, 2, 2])
diluteStruct.replace(0, 'Fe')
testStructures = [diluteStruct.copy()]*2
descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='serial')
self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.")
for desc in descList:
self.assertListEqual(
desc.tolist(),
descList[0].tolist(),
"All descriptors should be equal for the same structure are the same."
)
def test_descriptorCalculate_KS2022_dilute_parallel(self):
"""Test succesful execution of the descriptorCalculate() method with KS2022_dilute in parallel based on an Al
prototype loaded from the default prototype library. A separate test for calculation accuracy is done in
test_KS2022.py"""
with self.subTest(msg="Constructing dilute structures"):
diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy()
diluteStruct.make_supercell([2, 2, 2])
testStructures = []
for i in range(8):
tempStruct = diluteStruct.copy()
tempStruct.replace(i, 'Fe')
testStructures.append(tempStruct)
with self.subTest(msg="Running parallel calculation with default 'pure' base structure"):
descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='parallel', max_workers=4)
self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.")
with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"):
temp0 = descList[0].tolist()
for desc in descList:
temp1 = desc.tolist()
for t0, t1 in zip(temp0, temp1):
self.assertAlmostEqual(t0, t1, places=6)
with self.subTest(msg="Running parallel calculation with defined base structures"):
baseStructs = [diluteStruct.copy()]*8
descList = self.c.calculate_KS2022_dilute(
structList=testStructures,
baseStruct=baseStructs,
mode='parallel',
max_workers=4)
self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.")
with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"):
for desc in descList:
temp1 = desc.tolist()
for t0, t1 in zip(temp0, temp1):
self.assertAlmostEqual(t0, t1, places=6)
def test_RunModels_Errors(self):
'''Test that the runModels() and runModels_dilute() methods raise errors correctly when it is called with no
models to run or with a descriptor handling that has not been implemented.
'''
with self.subTest(mgs='No models to run'):
with self.assertRaises(AssertionError):
self.c.network_list_available = []
self.c.runModels(descriptor='KS2022', structList=[])
with self.subTest(mgs='No models to run dilute'):
with self.assertRaises(AssertionError):
self.c.network_list_available = []
self.c.runModels_dilute(descriptor='KS2022', structList=[])
with self.subTest(mgs='No models to run random solid solution'):
with self.assertRaises(AssertionError):
self.c.network_list_available = []
self.c.runModels_randomSolutions(descriptor='KS2022', baseStructList=[], compList=[])
with self.subTest(mgs='Descriptor not implemented'):
with self.assertRaises(AssertionError):
self.c.runModels(descriptor='jx9348ghfmx8345wgyf', structList=[])
with self.subTest(mgs='Dilute descriptor not implemented'):
with self.assertRaises(AssertionError):
self.c.runModels_dilute(descriptor='jx9348ghfmx8345wgyf', structList=[])
with self.subTest(mgs='Random solution descriptor not implemented'):
with self.assertRaises(AssertionError):
self.c.runModels_randomSolutions(descriptor='jx9348ghfmx8345wgyf', baseStructList=[], compList=[])
def test_WriteDescriptorDataToCSV(self):
'''Test that the writeDescriptorsToCSV() method writes the correct data to a CSV file and that the file is
consistent with the reference output. It does that with both anonymous structures it enumerates and labeled
structures based on the c.inputFileNames list.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = natsorted(os.listdir(exampleInputsDir))[:4]
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
self.c.calculate_KS2022(structList=testStructures, mode='serial')
self.c.writeDescriptorsToCSV(descriptor='KS2022',
file='TestFile_DescriptorData_4_KS2022_labeled_enumerated.csv')
with open('TestFile_DescriptorData_4_KS2022_labeled_enumerated.csv', 'r', newline='') as f1:
with resources.files('pysipfenn').joinpath(
'tests/testCaseFiles/TestFile_DescriptorData_4_KS2022_labeled_enumerated.csv').open('r',
newline='') as f2:
for line1, line2 in zip(f1, f2):
self.assertEqual(line1, line2)
self.c.inputFiles = ['myStructure1.POSCAR', 'myStructure2.POSCAR', 'myStructure3.POSCAR', 'myStructure4.POSCAR']
self.c.writeDescriptorsToCSV(descriptor='KS2022',
file='TestFile_DescriptorData_4_KS2022_labeled_named.csv')
with open('TestFile_DescriptorData_4_KS2022_labeled_named.csv', 'r', newline='') as f1:
with resources.files('pysipfenn').joinpath(
'tests/testCaseFiles/TestFile_DescriptorData_4_KS2022_labeled_named.csv').open('r',
newline=''
) as f2:
for line1, line2 in zip(f1, f2):
self.assertEqual(line1, line2)
def test_WriteDescriptorDataToNPY(self):
'''Test that the writeDescriptorsToNPY() method writes the correct data to a NPY file and that the file is
consistent with the reference output. It does that with both anonymous structures it enumerates and labeled
structures based on the c.inputFileNames list.
'''
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/exampleInputFiles') as exampleInputsDir:
exampleInputFiles = natsorted(os.listdir(exampleInputsDir))[:4]
testStructures = [Structure.from_file(f'{exampleInputsDir}/{eif}') for eif in exampleInputFiles]
self.c.calculate_KS2022(structList=testStructures, mode='serial')
self.c.writeDescriptorsToNPY(descriptor='KS2022',
file='TestFile_DescriptorData_4_KS2022_labeled_enumerated.npy')
loaded_data = np.load('TestFile_DescriptorData_4_KS2022_labeled_enumerated.npy')
np.testing.assert_array_equal(loaded_data, self.c.descriptorData)
self.c.inputFiles = ['myStructure1.POSCAR', 'myStructure2.POSCAR', 'myStructure3.POSCAR', 'myStructure4.POSCAR']
for file in ['TestFile_DescriptorData_4_KS2022_labeled_named.npy', 'descriptorData.npy']:
self.c.writeDescriptorsToNPY(descriptor='KS2022', file=file)
if file == 'descriptorData.npy':
loaded_data = np.load('KS2022_' + file)
else:
loaded_data = np.load(file)
np.testing.assert_array_equal(loaded_data, self.c.descriptorData)
def test_CalculatorPrint(self):
'''Test that the Calculator.__str__() method returns the correctly formatted string after being initialized
but before predictions.
'''
printOut = str(self.c)
self.assertIn('pySIPFENN Calculator Object', printOut)
self.assertIn('Models are located', printOut)
self.assertIn('Loaded Networks', printOut)
def test_util_Ward2017toKS2022(self):
"""Tests that Ward2017 conversion to its KS2022 subset works as intended."""
struct = self.c.prototypeLibrary['FCC']['structure']
self.assertIsInstance(struct, Structure)
desc1 = self.c.calculate_Ward2017([struct])[0]
desc2 = list(self.c.calculate_KS2022([struct])[0])
desc2from1 = list(pysipfenn.ward2ks2022(desc1))
for d2, d21 in zip(desc2, desc2from1):
self.assertAlmostEqual(d2, d21, places=6, msg="Direct and converted KS2022toWard2017 should be the same.")
class TestCoreRSS(unittest.TestCase):
"""Test the high-level API functionality of the Calculator object in regard to random solution structures (RSS). It
does not test the accuracy, just all runtime modes and known physicality of the results (e.g., FCC should have
coordination number of `12`).
Note:
The execution of the descriptorCalculate() method with KS2022_randomSolution is done under coarse settings
(for speed reasons) and should not be used for any accuracy tests. A separate testing for calculation accuracy
against consistency and reference values is done in `test_KS2022_randomSolutions.py`.
"""
def setUp(self):
self.c = pysipfenn.Calculator()
self.assertIsNotNone(self.c)
def test_descriptorCalculate_KS2022_randomSolution_serial_pair(self):
"""Test successful execution of a composition-structure pair in series"""
with self.subTest(msg="Running single composition-structure pair"):
d1 = self.c.calculate_KS2022_randomSolutions(
'BCC',
'FeNi',
minimumSitesPerExpansion=16,
featureConvergenceCriterion=0.02,
compositionConvergenceCriterion=0.05,
mode='serial')
self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.")
self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.")
def test_descriptorCalculate_KS2022_randomSolution_serial_multiple(self):
"""Test successful execution (in series) of multiple compositions occupying the same FCC lattice."""
with self.subTest(msg="Running multiple compositions occupying the same FCC lattice"):
d2 = self.c.calculate_KS2022_randomSolutions(
'FCC',
['FeNi', 'CrNi'],
minimumSitesPerExpansion=16,
featureConvergenceCriterion=0.02,
compositionConvergenceCriterion=0.05,
mode='serial')
self.assertEqual(len(d2), 2, "Two composition-structure pairs should be processed.")
self.assertEqual(len(d2[0]), 256, "All 256 KS2022 features should be obtained.")
self.assertEqual(len(d2[1]), 256, "All 256 KS2022 features should be obtained.")
self.assertAlmostEqual(
float(d2[0][0]),
float(d2[1][0])
, places=6, msg="Coordination number (KS2022[0]) should be the same (12) for both compositions.")
self.assertNotAlmostEqual(
float(d2[0][13]),
float(d2[1][13])
, places=6, msg="mean_NeighDiff_shell1_Number (KS2022[13]) should be different (1.0vs2.0)."
)
def test_descriptorCalculate_KS2022_randomSolution_parallel_pair(self):
"""Test successful execution of a composition-structure pair in parallel mode. Just for the input passing
validation."""
with self.subTest(msg="Running single composition-structure pair"):
d1 = self.c.calculate_KS2022_randomSolutions(
'BCC',
'FeNi',
mode='parallel',
max_workers=1)
self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.")
self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.")
def test_descriptorCalculate_KS2022_randomSolution_parallel_multiple(self):
"""Test successful execution of manu composition-structure pairs given in ordered lists of input."""
myBCC = self.c.prototypeLibrary['BCC']['structure']
with self.subTest(msg="Running multiple compositions occupying multiple prototypes"):
d2 = self.c.calculate_KS2022_randomSolutions(
['FCC', myBCC, 'BCC', 'HCP'],
['WMo', Composition('WMo'), 'FeNi', 'CrNi'],
mode='parallel',
max_workers=4)
self.assertEqual(len(d2), 4, "Four composition-structure pairs should be processed.")
for i in range(4):
self.assertEqual(len(d2[i]), 256, "All 256 KS2022 features should be obtained.")
self.assertNotAlmostEqual(
float(d2[0][0]),
float(d2[1][0]),
places=6, msg="Coordination number (KS2022[0]) should be different for BCC and FCC.")
self.assertAlmostEqual(
float(d2[1][0]),
float(d2[2][0]),
places=6, msg="Coordination number (KS2022[0]) should be the same for both BCCs.")
with self.subTest(msg='Verify that the metadata was correctly recorded.'):
assert len(self.c.metas['RSS']) == 4, "There should be 4 metadata records."
for meta in self.c.metas['RSS']:
self.assertIn('diffHistory', meta)
self.assertIn('propHistory', meta)
self.assertIn('finalAtomsN', meta)
self.assertIn('finalCompositionDistance', meta)
self.assertIn('finalComposition', meta)