/
activity_error.py
173 lines (147 loc) · 6.73 KB
/
activity_error.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
Calculate error due to measured activities.
"""
import logging
import numpy as np
import tinydb
from pycalphad import equilibrium, variables as v
from pycalphad.plot.eqplot import _map_coord_to_variable
from pycalphad.core.utils import filter_phases
from scipy.stats import norm
from espei.core_utils import ravel_conditions
def target_chempots_from_activity(component, target_activity, temperatures, reference_result):
"""
Return an array of experimental chemical potentials for the component
Parameters
----------
component : str
Name of the component
target_activity : numpy.ndarray
Array of experimental activities
temperatures : numpy.ndarray
Ravelled array of temperatures (of same size as ``exp_activity``).
reference_result : xarray.Dataset
Dataset of the equilibrium reference state. Should contain a singe point calculation.
Returns
-------
numpy.ndarray
Array of experimental chemical potentials
"""
# acr_i = exp((mu_i - mu_i^{ref})/(RT))
# so mu_i = R*T*ln(acr_i) + mu_i^{ref}
ref_chempot = reference_result["MU"].sel(component=component).values.flatten()
return v.R * temperatures * np.log(target_activity) + ref_chempot
def chempot_error(sample_chempots, target_chempots, std_dev=10.0):
"""
Return the sum of square error from chemical potentials
sample_chempots : numpy.ndarray
Calculated chemical potentials
target_activity : numpy.ndarray
Chemical potentials to target
std_dev : float
Standard deviation of activity measurements in J/mol. Corresponds to the
standard deviation of differences in chemical potential in typical
measurements of activity.
Returns
-------
float
Error due to chemical potentials
"""
# coerce the chemical potentials to float64s, fixes an issue where SymPy NaNs don't work
return norm(loc=0, scale=std_dev).logpdf(np.array(target_chempots - sample_chempots, dtype=np.float64))
def calculate_activity_error(dbf, comps, phases, datasets, parameters=None, phase_models=None, callables=None, data_weight=1.0):
"""
Return the sum of square error from activity data
Parameters
----------
dbf : pycalphad.Database
Database to consider
comps : list
List of active component names
phases : list
List of phases to consider
datasets : espei.utils.PickleableTinyDB
Datasets that contain single phase data
parameters : dict
Dictionary of symbols that will be overridden in pycalphad.equilibrium
phase_models : dict
Phase models to pass to pycalphad calculations
callables : dict
Callables to pass to pycalphad
data_weight : float
Weight for standard deviation of activity measurements, dimensionless.
Corresponds to the standard deviation of differences in chemical
potential in typical measurements of activity, in J/mol.
Returns
-------
float
A single float of the sum of square errors
Notes
-----
General procedure:
1. Get the datasets
2. For each dataset
a. Calculate reference state equilibrium
b. Calculate current chemical potentials
c. Find the target chemical potentials
d. Calculate error due to chemical potentials
"""
std_dev = 500 # J/mol
if parameters is None:
parameters = {}
activity_datasets = datasets.search(
(tinydb.where('output').test(lambda x: 'ACR' in x)) &
(tinydb.where('components').test(lambda x: set(x).issubset(comps))))
error = 0
if len(activity_datasets) == 0:
return error
for ds in activity_datasets:
acr_component = ds['output'].split('_')[1] # the component of interest
# calculate the reference state equilibrium
ref = ds['reference_state']
# data_comps and data_phases ensures that we only do calculations on
# the subsystem of the system defining the data.
data_comps = ds['components']
species = list(map(v.Species, data_comps))
data_phases = filter_phases(dbf, species, candidate_phases=phases)
ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()}
ref_result = equilibrium(dbf, data_comps, ref['phases'], ref_conditions,
model=phase_models, parameters=parameters,
callables=callables)
# calculate current chemical potentials
# get the conditions
conditions = {}
# first make sure the conditions are paired
# only get the compositions, P and T are special cased
conds_list = [(cond, value) for cond, value in ds['conditions'].items() if cond not in ('P', 'T')]
# ravel the conditions
# we will ravel each composition individually, since they all must have the same shape
for comp_name, comp_x in conds_list:
P, T, X = ravel_conditions(ds['values'], ds['conditions']['P'], ds['conditions']['T'], comp_x)
conditions[v.P] = P
conditions[v.T] = T
conditions[_map_coord_to_variable(comp_name)] = X
# do the calculations
# we cannot currently turn broadcasting off, so we have to do equilibrium one by one
# invert the conditions dicts to make a list of condition dicts rather than a condition dict of lists
# assume now that the ravelled conditions all have the same size
conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))]
current_chempots = []
for conds in conditions_list:
sample_eq_res = equilibrium(dbf, data_comps, data_phases, conds,
model=phase_models, parameters=parameters,
callables=callables)
current_chempots.append(sample_eq_res.MU.sel(component=acr_component).values.flatten()[0])
current_chempots = np.array(current_chempots)
# calculate target chempots
samples = np.array(ds['values']).flatten()
target_chempots = target_chempots_from_activity(acr_component, samples, conditions[v.T], ref_result)
# calculate the error
weight = ds.get('weight', 1.0)
pe = chempot_error(current_chempots, target_chempots, std_dev=std_dev/data_weight/weight)
error += np.sum(pe)
logging.debug('Activity error - data: {}, chemical potential difference: {}, probability: {}, reference: {}'.format(samples, current_chempots-target_chempots, pe, ds["reference"]))
# TODO: write a test for this
if np.any(np.isnan(np.array([error], dtype=np.float64))): # must coerce sympy.core.numbers.Float to float64
return -np.inf
return error