Merge pull request #7 from ConorMacBride/model-bugs

Model docstring fixes and additional parameter checks
ConorMacBride · Jan 5, 2021 · 4adc463 · 4adc463
2 parents 88d4c59 + 740bec5
commit 4adc463
Show file tree

Hide file tree

Showing 3 changed files with 87 additions and 19 deletions.
diff --git a/src/mcalf/models/ibis.py b/src/mcalf/models/ibis.py
@@ -30,28 +30,29 @@ class IBIS8542Model(ModelBase):
     ----------
     original_wavelengths : array_like
         One-dimensional array of wavelengths that correspond to the uncorrected spectral data.
-    stationary_line_core : float, optional, default = 8542.104320687517
+    stationary_line_core : float, optional, default = 8542.099145376844
         Wavelength of the stationary line core.
     absorption_guess : array_like, length=4, optional, default = [-1000, stationary_line_core, 0.2, 0.1]
         Initial guess to take when fitting the absorption Voigt profile.
     emission_guess : array_like, length=4, optional, default = [1000, stationary_line_core, 0.2, 0.1]
         Initial guess to take when fitting the emission Voigt profile.
     absorption_min_bound : array_like, length=4, optional, default = [-np.inf, stationary_line_core-0.15, 1e-6, 1e-6]
         Minimum bounds for all the absorption Voigt profile parameters in order of the function's arguments.
-    emission_min_bound : array_like, length=4, optional, default = [0, stationary_line_core-0.15, 1e-6, 1e-6]
+    emission_min_bound : array_like, length=4, optional, default = [0, -np.inf, 1e-6, 1e-6]
         Minimum bounds for all the emission Voigt profile parameters in order of the function's arguments.
     absorption_max_bound : array_like, length=4, optional, default = [0, stationary_line_core+0.15, 1, 1]
         Maximum bounds for all the absorption Voigt profile parameters in order of the function's arguments.
-    emission_max_bound : array_like, length=4, optional, default = [np.inf, stationary_line_core+0.15, 1, 1]
+    emission_max_bound : array_like, length=4, optional, default = [np.inf, np.inf, 1, 1]
         Maximum bounds for all the emission Voigt profile parameters in order of the function's arguments.
     absorption_x_scale : array_like, length=4, optional, default = [1500, 0.2, 0.3, 0.5]
         Characteristic scale for all the absorption Voigt profile parameters in order of the function's arguments.
     emission_x_scale : array_like, length=4, optional, default = [1500, 0.2, 0.3, 0.5]
         Characteristic scale for all the emission Voigt profile parameters in order of the function's arguments.
     neural_network : sklearn.neural_network.MLPClassifier, optional, default = see description
-        The MLPClassifier object that will be used to classify the spectra. Its default value is
-        `MLPClassifier(solver='lbfgs', alpha=1e-3, hidden_layer_sizes=(10, 4), random_state=1)`.
-    constant_wavelengths : array_like, length same as `original_wavelengths`, optional, default = see description
+        The MLPClassifier object (or similar) that will be used to classify the spectra. Defaults to a `GridSearchCV`
+        with `MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000)`
+        for best `alpha` selected from `[1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]`.
+    constant_wavelengths : array_like, ndim=1, optional, default = see description
         The desired set of wavelengths that the spectral data should be rescaled to represent. It is assumed
         that these have constant spacing, but that may not be a requirement if you specify your own array.
         The default value is an array from the minimum to the maximum wavelength of `original_wavelengths` in
@@ -67,7 +68,7 @@ class IBIS8542Model(ModelBase):
         it.
     prefilter_response : array_like, length=n_wavelengths, optional, default = see note
         Each constant wavelength scaled spectrum will be corrected by dividing it by this array. If `prefilter_response`
-        is not give, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
+        is not given, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
         default value of `None`.
     prefilter_ref_main : array_like, optional, default = None
         If `prefilter_response` is not specified, this will be used along with `prefilter_ref_wvscl` to generate the
@@ -114,7 +115,7 @@ class IBIS8542Model(ModelBase):
         The MLPClassifier object (or similar) that will be used to classify the spectra. Defaults to a `GridSearchCV`
         with `MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000)`
         for best `alpha` selected from `[1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]`.
-    constant_wavelengths : array_like, length same as `original_wavelengths`, optional, default = see description
+    constant_wavelengths : array_like, ndim=1, optional, default = see description
         The desired set of wavelengths that the spectral data should be rescaled to represent. It is assumed
         that these have constant spacing, but that may not be a requirement if you specify your own array.
         The default value is an array from the minimum to the maximum wavelength of `original_wavelengths` in
@@ -126,7 +127,7 @@ class IBIS8542Model(ModelBase):
         See `utils.generate_sigma()` for more information.
     prefilter_response : array_like, length=n_wavelengths, optional, default = see note
         Each constant wavelength scaled spectrum will be corrected by dividing it by this array. If `prefilter_response`
-        is not give, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
+        is not given, and `prefilter_ref_main` and `prefilter_ref_wvscl` are not given, `prefilter_response` will have a
         default value of `None`.
     output : str, optional, default = None
         If the program wants to output data, it will place it relative to the location specified by this parameter.
@@ -539,9 +540,10 @@ def fit(self, time=None, row=None, column=None, spectrum=None, profile=None, sig
             The profile to fit. (Will infer profile from `classifications` if omitted.)
         sigma : int or array_like, optional, default = None
             Explicit sigma index or profile. See `_get_sigma` for details.
-        classifications : int, optional, default = None
+        classifications : int or array_like, optional, default = None
             Classifications to determine the fitted profile to use (if profile not explicitly given). Will use
-            neural network to classify them if not.
+            neural network to classify them if not. If a multidimensional array, must have the same shape as
+            [`time`, `row`, `column`]. Dimensions that would have length of 1 can be excluded.
         background : float, optional, default = None
             If provided, this value will be subtracted from the explicit spectrum provided in `spectrum`. Will
             not be applied to spectra found from the indices, use the `load_background` method instead.
@@ -582,9 +584,22 @@ def fit(self, time=None, row=None, column=None, spectrum=None, profile=None, sig
             time, row, column = make_iter(*self._get_time_row_column(time=time, row=row, column=column))
             indices = np.transpose(np.array(np.meshgrid(time, row, column, indexing='ij')), axes=[1, 2, 3, 0])
 
+            # Ensure that length of arrays match
+            spectra_indices_shape_mismatch = spectra.shape[:-1] != indices.shape[:-1]
+            spectra_class_size_mismatch = np.size(spectra[..., 0]) != np.size(classifications)
+            spectra_class_shape_mismatch = False  # Only test this if appropriate
+            if isinstance(classifications, np.ndarray) and classifications.ndim != 1:
+                # If a multidimensional array of classifications are given, make sure it matches the indices layout
+                # Allow for dimensions of length 1 to be excluded
+                if np.squeeze(spectra[..., 0]).shape != np.squeeze(classifications).shape:
+                    spectra_class_shape_mismatch = True
+            if spectra_indices_shape_mismatch or spectra_class_size_mismatch or spectra_class_shape_mismatch:
+                raise ValueError("number classifications do not match number of spectra and associated indices")
+
             # Make shape (n_spectra, n_features) so can process in a list
             spectra = spectra.reshape(-1, spectra.shape[-1])
             indices = indices.reshape(-1, indices.shape[-1])
+            classifications = np.asarray(classifications)  # Make sure ndarray methods are available
             classifications = classifications.reshape(-1)
 
             # Remove spectra that are invalid (this allows for masking of the loaded data to constrain a region to fit)

diff --git a/src/mcalf/models/results.py b/src/mcalf/models/results.py
@@ -15,7 +15,7 @@ class FitResult:
     fitted_parameters : ndarray
         The parameters fitted.
     fit_info : dict
-        Additional information on the fit including at least 'classification', 'profile', 'success' and 'index'.
+        Additional information on the fit including at least 'classification', 'profile', 'success', 'chi2' and 'index'.
 
     Attributes
     ----------
@@ -27,6 +27,8 @@ class FitResult:
         Profile of the fitted spectrum.
     success : bool
         Whether the fit was completed successfully.
+    chi2 : float
+        Chi-squared value for the fit.
     index : list
         Index ([<time>, <row>, <column>]) of the spectrum in the spectral array.
     __dict__

diff --git a/tests/models/test_ibis.py b/tests/models/test_ibis.py
@@ -266,6 +266,63 @@ def ibis8542model_results(ibis8542model_spectra):
     return result, m, classifications
 
 
+def assert_results_equal(res1, res2):
+    for i in range(len(res1)):
+        assert np.array_equal(res1[i].parameters, res2[i].parameters)
+        assert res1[i].classification == res2[i].classification
+        assert res1[i].profile == res2[i].profile
+        assert res1[i].success == res2[i].success
+        assert np.array_equal(res1[i].index, res2[i].index)
+
+
+def test_ibis8542model_wrong_length_of_classifications(ibis8542model_spectra):
+
+    # Load model with random spectra loaded
+    m, classifications = ibis8542model_spectra
+
+    assert classifications.shape == (2, 3, 4)  # This is what the test needs to work (verifies fixture)
+
+    # Test with too few classifications
+    c1 = classifications[:, :, [0, 1]]
+    assert c1.shape == (2, 3, 2)
+    c2 = classifications[:, [0, 1]]
+    assert c2.shape == (2, 2, 4)
+    c3 = classifications[[1]]
+    assert c3.shape == (1, 3, 4)
+    c_wrong_shape = np.transpose(classifications)  # ...but correct size so would not fail otherwise
+    assert c_wrong_shape.shape == (4, 3, 2)
+    for c in [c1, c2, c3, c_wrong_shape]:
+        with pytest.raises(ValueError) as e:
+            result = m.fit(time=range(2), row=range(3), column=range(4), classifications=c)
+        assert 'classifications do not match' in str(e.value)
+
+    # Test with too many classifications
+    for t, r, c in [
+        # (range(2), range(3), range(4)),  # Correct values
+        (1, range(3), range(4)),
+        (range(2), range(1, 3), range(4)),
+        (range(2), range(3), range(2, 4)),
+        (range(2), range(3), 3),
+        (0, 1, 2),
+    ]:
+        with pytest.raises(ValueError) as e:
+            result = m.fit(time=t, row=r, column=c, classifications=classifications)
+        assert 'classifications do not match' in str(e.value)
+
+    # Test with dimensions of length 1 removed (res1 and res2 should be equivalent)
+
+    c = classifications[:, np.newaxis, 0]
+    assert c.shape == (2, 1, 4)
+    res1 = m.fit(time=range(2), row=range(1), column=range(4), classifications=c)
+
+    c = classifications[:, 0]
+    assert c.shape == (2, 4)
+    res2 = m.fit(time=range(2), row=range(1), column=range(4), classifications=c)
+
+    assert len(res1) == len(res2) == 2 * 1 * 4
+    assert_results_equal(res1, res2)
+
+
 @pytest.fixture()
 def ibis8542model_resultsobjs(ibis8542model_results):
 
@@ -298,13 +355,7 @@ def test_ibis8542model_fit(ibis8542model_results, ibis8542model_resultsobjs):
     res2 = m.fit(time=range(2), row=range(3), column=range(4))
 
     assert len(res1) == len(res2) == 2*3*4
-
-    for i in range(len(res1)):
-        assert np.array_equal(res1[i].parameters, res2[i].parameters)
-        assert res1[i].classification == res2[i].classification
-        assert res1[i].profile == res2[i].profile
-        assert res1[i].success == res2[i].success
-        assert np.array_equal(res1[i].index, res2[i].index)
+    assert_results_equal(res1, res2)
 
     # # METHOD 3: Test over 4 processing pools
     res3 = m.fit(time=range(2), row=range(3), column=range(4), n_pools=4)