-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_ModelAdjusters.py
194 lines (164 loc) · 8.33 KB
/
test_ModelAdjusters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import unittest
import pytest
import os
import pysipfenn
import torch
from importlib import resources
# Skip the tests if we're in GitHub Actions and the models haven't been fetched yet
IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" and os.getenv("MODELS_FETCHED") != "true"
@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files")
class TestModelAdjusters(unittest.TestCase):
"""
Test all model adjusting features that can operate on the Calculator object. Note that this will require
the models to be downloaded and the environment variable MODELS_FETCHED to be set to true if running in GitHub
Actions.
The setup will load the Krajewski2022_NN30 model and create an ``OPTIMADEAdjuster`` object for testing that is by
default connected to the Materials Project ``OPTIMADE`` server and looks for their GGA+U formation energies. In the
``testFullRoutine`` test, the adjuster will be used to adjust the model to the Hf-Mo metallic system. The test will
cover almost all adjuster functionalities in different ways to hit all anticipated code paths. It also tests the
``LocalAdjuster`` class for loading data from CSV and NPY files, which is a parent class of the ``OPTIMADEAdjuster``.
"""
def setUp(self):
"""
Initialises the Calculator and ModelAdjuster objects for testing.
"""
self.c = pysipfenn.Calculator(autoLoad=False)
self.assertIsNotNone(self.c)
self.c.loadModels("SIPFENN_Krajewski2022_NN30")
self.assertIn('SIPFENN_Krajewski2022_NN30', self.c.loadedModels)
self.ma = pysipfenn.OPTIMADEAdjuster(self.c, "SIPFENN_Krajewski2022_NN30")
def testInit(self):
"""
Test that the OPTIMADEAdjuster object has been initialized correctly.
"""
self.assertEqual(self.ma.modelName, "SIPFENN_Krajewski2022_NN30")
self.assertIsInstance(self.ma.model, torch.nn.Module)
self.assertIsInstance(self.ma.calculator, pysipfenn.Calculator)
self.assertEqual(len(self.ma.comps), 0)
self.assertEqual(len(self.ma.names), 0)
self.assertEqual(len(self.ma.validationLabels), 0)
def testPlotExceptions(self):
"""
Test that the plot does not plot anything when no data is present.
"""
self.assertRaises(AssertionError, self.ma.plotStarting)
self.assertRaises(AssertionError, self.ma.plotAdjusted)
def testFullRoutine(self):
"""
Test the full routine of the adjuster based on the default values pointing to Materials Project. Get the data
using OPTIMADE to adjust the model to Hf-Mo metallic system. Matrix search is reduced to 4 cases to speed up
the test and it is designed to explore all code paths in the search. The test will also check the highlighting
and plotting functionalities of the adjuster.
"""
self.ma.fetchAndFeturize(
'elements HAS "Hf" AND elements HAS "Mo" AND NOT elements HAS ANY "O","C","F","Cl","S"',
parallelWorkers=4)
self.ma.calculator.writeDescriptorsToCSV("KS2022", "AdjusterTestDescriptors.csv")
self.ma.calculator.writeDescriptorsToNPY("KS2022", "AdjusterTestDescriptors.npy")
# Check highlighting and no-last-validation plotting
self.ma.highlightPoints([32, 23, 21, 22])
self.ma.plotStarting()
# Hyperparameter search. The 1e-8 is on purpose, so that the model does not converge and always improves after
# the first epoch.
self.ma.matrixHyperParameterSearch(
learningRates=[1e-8, 1e-3],
optimizers= ["Adam"],
weightDecays=[1e-4, 1e-5],
epochs=10
)
self.ma.highlightPoints([0, 1, 2, 3])
self.ma.highlightCompositions(["Hf", "Mo", "HfMo", "Hf50 Mo50", "Hf3Mo"])
self.ma.plotStarting()
self.ma.plotAdjusted()
# Induce duplicates to test if they are handled
self.ma.fetchAndFeturize(
'elements HAS "Hf" AND elements HAS "Mo" AND NOT elements HAS ANY "O","C","F","Cl","S"',
parallelWorkers=4)
self.ma.adjust(
validation=0,
learningRate=1e-4,
epochs=10,
optimizer="Adamax",
weightDecay=1e-4,
lossFunction="MSE"
)
self.ma.names = []
self.ma.plotStarting()
self.ma.plotAdjusted()
def testDataLoading(self):
"""
Test the data loading functionality of the ``LocalAdjuster`` class (note, ``OPTIMADEAdjuster`` extends it). It
will test loading from both CSV and NPY files exported from the Calculator object. Note that CSV files have
names in the first column and headers in the first row, while NPY files are just the data arrays. It tests
implicit loading from the ``Calculator`` object as well. Lastly, it tests the error raising for unsupported
descriptors and data not matching the descriptor dimensions selected (an optional feature).
"""
with resources.files('pysipfenn').joinpath('tests/testCaseFiles/') as testFileDir:
# From CSV
self.lma1 = pysipfenn.LocalAdjuster(
self.c,
model="SIPFENN_Krajewski2022_NN30",
descriptorData=str(testFileDir.joinpath("AdjusterTestDescriptors.csv")),
targetData=str(testFileDir.joinpath("AdjusterTestTargets.csv")),
descriptor="KS2022"
)
assert self.lma1 is not None
assert len(self.lma1.descriptorData) > 0
assert len(self.lma1.targetData) > 0
del self.lma1
# From NPY
self.lma2 = pysipfenn.LocalAdjuster(
self.c,
model="SIPFENN_Krajewski2022_NN30",
descriptorData=str(testFileDir.joinpath("AdjusterTestDescriptors.npy")),
targetData=str(testFileDir.joinpath("AdjusterTestTargets.npy")),
descriptor="KS2022"
)
assert self.lma2 is not None
assert len(self.lma2.descriptorData) > 0
assert len(self.lma2.targetData) > 0
self.c.descriptorData = self.lma2.descriptorData
del self.lma2
# Implicit, from the Calculator
self.lma3 = pysipfenn.LocalAdjuster(
self.c,
targetData=str(testFileDir.joinpath("AdjusterTestTargets.csv")),
model="SIPFENN_Krajewski2022_NN30",
descriptor="KS2022",
)
# Error raising
with self.assertRaises(AssertionError):
self.lma4 = pysipfenn.LocalAdjuster(
self.c,
targetData=str(testFileDir.joinpath("AdjusterTestTargets.csv")),
model="SIPFENN_Krajewski2022_NN30",
descriptor="Ward2017",
)
with self.assertRaises(NotImplementedError):
self.lma5 = pysipfenn.LocalAdjuster(
self.c,
targetData=str(testFileDir.joinpath("AdjusterTestTargets.csv")),
model="SIPFENN_Krajewski2022_NN30",
descriptor="SomeCrazyDescriptor",
)
def testEndpointOverride(self):
"""
Test the endpoint override functionality of the ``OPTIMADEAdjuster`` class. It will test the override of the
endpoint and the data fetching from the new endpoint.
"""
endpoint = ["https://alexandria.icams.rub.de/pbesol"]
targetPath = ['attributes', '_alexandria_formation_energy_per_atom']
self.ma2 = pysipfenn.OPTIMADEAdjuster(
self.c,
model="SIPFENN_Krajewski2022_NN30",
endpointOverride=endpoint,
targetPath=targetPath)
self.ma2.fetchAndFeturize(
'elements HAS "Hf" AND elements HAS "Mo" AND elements HAS "Zr"',
parallelWorkers=2)
self.assertGreaterEqual(len(self.ma2.comps), 0, "No compositions were found, thus no data was fetched.")
self.assertGreaterEqual(len(self.ma2.names), 0, "No names were found, thus no data was fetched.")
self.assertGreaterEqual(
len(self.ma2.descriptorData), 0,
"No descriptor data was found. If the other asserts passed, this is likely a bug in the featurization "
"or structural data has been made incompatible or otherwise corrupted.")