Skip to content

Commit

Permalink
Merge pull request #7 from ConorMacBride/model-bugs
Browse files Browse the repository at this point in the history
Model docstring fixes and additional parameter checks
  • Loading branch information
ConorMacBride committed Jan 5, 2021
2 parents 88d4c59 + 740bec5 commit 4adc463
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 19 deletions.
37 changes: 26 additions & 11 deletions src/mcalf/models/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,29 @@ class IBIS8542Model(ModelBase):
----------
original_wavelengths : array_like
One-dimensional array of wavelengths that correspond to the uncorrected spectral data.
stationary_line_core : float, optional, default = 8542.104320687517
stationary_line_core : float, optional, default = 8542.099145376844
Wavelength of the stationary line core.
absorption_guess : array_like, length=4, optional, default = [-1000, stationary_line_core, 0.2, 0.1]
Initial guess to take when fitting the absorption Voigt profile.
emission_guess : array_like, length=4, optional, default = [1000, stationary_line_core, 0.2, 0.1]
Initial guess to take when fitting the emission Voigt profile.
absorption_min_bound : array_like, length=4, optional, default = [-np.inf, stationary_line_core-0.15, 1e-6, 1e-6]
Minimum bounds for all the absorption Voigt profile parameters in order of the function's arguments.
emission_min_bound : array_like, length=4, optional, default = [0, stationary_line_core-0.15, 1e-6, 1e-6]
emission_min_bound : array_like, length=4, optional, default = [0, -np.inf, 1e-6, 1e-6]
Minimum bounds for all the emission Voigt profile parameters in order of the function's arguments.
absorption_max_bound : array_like, length=4, optional, default = [0, stationary_line_core+0.15, 1, 1]
Maximum bounds for all the absorption Voigt profile parameters in order of the function's arguments.
emission_max_bound : array_like, length=4, optional, default = [np.inf, stationary_line_core+0.15, 1, 1]
emission_max_bound : array_like, length=4, optional, default = [np.inf, np.inf, 1, 1]
Maximum bounds for all the emission Voigt profile parameters in order of the function's arguments.
absorption_x_scale : array_like, length=4, optional, default = [1500, 0.2, 0.3, 0.5]
Characteristic scale for all the absorption Voigt profile parameters in order of the function's arguments.
emission_x_scale : array_like, length=4, optional, default = [1500, 0.2, 0.3, 0.5]
Characteristic scale for all the emission Voigt profile parameters in order of the function's arguments.
neural_network : sklearn.neural_network.MLPClassifier, optional, default = see description
The MLPClassifier object that will be used to classify the spectra. Its default value is
`MLPClassifier(solver='lbfgs', alpha=1e-3, hidden_layer_sizes=(10, 4), random_state=1)`.
constant_wavelengths : array_like, length same as `original_wavelengths`, optional, default = see description
The MLPClassifier object (or similar) that will be used to classify the spectra. Defaults to a `GridSearchCV`
with `MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000)`
for best `alpha` selected from `[1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]`.
constant_wavelengths : array_like, ndim=1, optional, default = see description
The desired set of wavelengths that the spectral data should be rescaled to represent. It is assumed
that these have constant spacing, but that may not be a requirement if you specify your own array.
The default value is an array from the minimum to the maximum wavelength of `original_wavelengths` in
Expand All @@ -67,7 +68,7 @@ class IBIS8542Model(ModelBase):
it.
prefilter_response : array_like, length=n_wavelengths, optional, default = see note
Each constant wavelength scaled spectrum will be corrected by dividing it by this array. If `prefilter_response`
is not give, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
is not given, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
default value of `None`.
prefilter_ref_main : array_like, optional, default = None
If `prefilter_response` is not specified, this will be used along with `prefilter_ref_wvscl` to generate the
Expand Down Expand Up @@ -114,7 +115,7 @@ class IBIS8542Model(ModelBase):
The MLPClassifier object (or similar) that will be used to classify the spectra. Defaults to a `GridSearchCV`
with `MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000)`
for best `alpha` selected from `[1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]`.
constant_wavelengths : array_like, length same as `original_wavelengths`, optional, default = see description
constant_wavelengths : array_like, ndim=1, optional, default = see description
The desired set of wavelengths that the spectral data should be rescaled to represent. It is assumed
that these have constant spacing, but that may not be a requirement if you specify your own array.
The default value is an array from the minimum to the maximum wavelength of `original_wavelengths` in
Expand All @@ -126,7 +127,7 @@ class IBIS8542Model(ModelBase):
See `utils.generate_sigma()` for more information.
prefilter_response : array_like, length=n_wavelengths, optional, default = see note
Each constant wavelength scaled spectrum will be corrected by dividing it by this array. If `prefilter_response`
is not give, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
is not given, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
default value of `None`.
output : str, optional, default = None
If the program wants to output data, it will place it relative to the location specified by this parameter.
Expand Down Expand Up @@ -539,9 +540,10 @@ def fit(self, time=None, row=None, column=None, spectrum=None, profile=None, sig
The profile to fit. (Will infer profile from `classifications` if omitted.)
sigma : int or array_like, optional, default = None
Explicit sigma index or profile. See `_get_sigma` for details.
classifications : int, optional, default = None
classifications : int or array_like, optional, default = None
Classifications to determine the fitted profile to use (if profile not explicitly given). Will use
neural network to classify them if not.
neural network to classify them if not. If a multidimensional array, must have the same shape as
[`time`, `row`, `column`]. Dimensions that would have length of 1 can be excluded.
background : float, optional, default = None
If provided, this value will be subtracted from the explicit spectrum provided in `spectrum`. Will
not be applied to spectra found from the indices, use the `load_background` method instead.
Expand Down Expand Up @@ -582,9 +584,22 @@ def fit(self, time=None, row=None, column=None, spectrum=None, profile=None, sig
time, row, column = make_iter(*self._get_time_row_column(time=time, row=row, column=column))
indices = np.transpose(np.array(np.meshgrid(time, row, column, indexing='ij')), axes=[1, 2, 3, 0])

# Ensure that length of arrays match
spectra_indices_shape_mismatch = spectra.shape[:-1] != indices.shape[:-1]
spectra_class_size_mismatch = np.size(spectra[..., 0]) != np.size(classifications)
spectra_class_shape_mismatch = False # Only test this if appropriate
if isinstance(classifications, np.ndarray) and classifications.ndim != 1:
# If a multidimensional array of classifications are given, make sure it matches the indices layout
# Allow for dimensions of length 1 to be excluded
if np.squeeze(spectra[..., 0]).shape != np.squeeze(classifications).shape:
spectra_class_shape_mismatch = True
if spectra_indices_shape_mismatch or spectra_class_size_mismatch or spectra_class_shape_mismatch:
raise ValueError("number classifications do not match number of spectra and associated indices")

# Make shape (n_spectra, n_features) so can process in a list
spectra = spectra.reshape(-1, spectra.shape[-1])
indices = indices.reshape(-1, indices.shape[-1])
classifications = np.asarray(classifications) # Make sure ndarray methods are available
classifications = classifications.reshape(-1)

# Remove spectra that are invalid (this allows for masking of the loaded data to constrain a region to fit)
Expand Down
4 changes: 3 additions & 1 deletion src/mcalf/models/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class FitResult:
fitted_parameters : ndarray
The parameters fitted.
fit_info : dict
Additional information on the fit including at least 'classification', 'profile', 'success' and 'index'.
Additional information on the fit including at least 'classification', 'profile', 'success', 'chi2' and 'index'.
Attributes
----------
Expand All @@ -27,6 +27,8 @@ class FitResult:
Profile of the fitted spectrum.
success : bool
Whether the fit was completed successfully.
chi2 : float
Chi-squared value for the fit.
index : list
Index ([<time>, <row>, <column>]) of the spectrum in the spectral array.
__dict__
Expand Down
65 changes: 58 additions & 7 deletions tests/models/test_ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,63 @@ def ibis8542model_results(ibis8542model_spectra):
return result, m, classifications


def assert_results_equal(res1, res2):
for i in range(len(res1)):
assert np.array_equal(res1[i].parameters, res2[i].parameters)
assert res1[i].classification == res2[i].classification
assert res1[i].profile == res2[i].profile
assert res1[i].success == res2[i].success
assert np.array_equal(res1[i].index, res2[i].index)


def test_ibis8542model_wrong_length_of_classifications(ibis8542model_spectra):

# Load model with random spectra loaded
m, classifications = ibis8542model_spectra

assert classifications.shape == (2, 3, 4) # This is what the test needs to work (verifies fixture)

# Test with too few classifications
c1 = classifications[:, :, [0, 1]]
assert c1.shape == (2, 3, 2)
c2 = classifications[:, [0, 1]]
assert c2.shape == (2, 2, 4)
c3 = classifications[[1]]
assert c3.shape == (1, 3, 4)
c_wrong_shape = np.transpose(classifications) # ...but correct size so would not fail otherwise
assert c_wrong_shape.shape == (4, 3, 2)
for c in [c1, c2, c3, c_wrong_shape]:
with pytest.raises(ValueError) as e:
result = m.fit(time=range(2), row=range(3), column=range(4), classifications=c)
assert 'classifications do not match' in str(e.value)

# Test with too many classifications
for t, r, c in [
# (range(2), range(3), range(4)), # Correct values
(1, range(3), range(4)),
(range(2), range(1, 3), range(4)),
(range(2), range(3), range(2, 4)),
(range(2), range(3), 3),
(0, 1, 2),
]:
with pytest.raises(ValueError) as e:
result = m.fit(time=t, row=r, column=c, classifications=classifications)
assert 'classifications do not match' in str(e.value)

# Test with dimensions of length 1 removed (res1 and res2 should be equivalent)

c = classifications[:, np.newaxis, 0]
assert c.shape == (2, 1, 4)
res1 = m.fit(time=range(2), row=range(1), column=range(4), classifications=c)

c = classifications[:, 0]
assert c.shape == (2, 4)
res2 = m.fit(time=range(2), row=range(1), column=range(4), classifications=c)

assert len(res1) == len(res2) == 2 * 1 * 4
assert_results_equal(res1, res2)


@pytest.fixture()
def ibis8542model_resultsobjs(ibis8542model_results):

Expand Down Expand Up @@ -298,13 +355,7 @@ def test_ibis8542model_fit(ibis8542model_results, ibis8542model_resultsobjs):
res2 = m.fit(time=range(2), row=range(3), column=range(4))

assert len(res1) == len(res2) == 2*3*4

for i in range(len(res1)):
assert np.array_equal(res1[i].parameters, res2[i].parameters)
assert res1[i].classification == res2[i].classification
assert res1[i].profile == res2[i].profile
assert res1[i].success == res2[i].success
assert np.array_equal(res1[i].index, res2[i].index)
assert_results_equal(res1, res2)

# # METHOD 3: Test over 4 processing pools
res3 = m.fit(time=range(2), row=range(3), column=range(4), n_pools=4)
Expand Down

0 comments on commit 4adc463

Please sign in to comment.