Merge pull request #33 from ConorMacBride/implement-random-state

Implement random_state for IBIS8542Model MLP
ConorMacBride · May 14, 2021 · 9e17b19 · 9e17b19
2 parents 3e42120 + 6d9fde0
commit 9e17b19
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 14 deletions.
diff --git a/examples/data/ibis8542data/results.fits b/examples/data/ibis8542data/results.fits
diff --git a/examples/gallery/models/plot_ibis8542data.py b/examples/gallery/models/plot_ibis8542data.py
@@ -16,6 +16,8 @@
 # from the GitHub repository where it is hosted.
 # This will create four new files in the current
 # directory (about 651 KB total).
+# **You may need to install the requests
+# Python package for this step to run.**
 
 import requests
 
@@ -115,7 +117,7 @@
 
 import mcalf.models
 
-model = mcalf.models.IBIS8542Model(original_wavelengths=wavelengths)
+model = mcalf.models.IBIS8542Model(original_wavelengths=wavelengths, random_state=0)
 
 model.load_background(backgrounds, ['row', 'column'])
 model.load_array(spectra, ['wavelength', 'row', 'column'])
@@ -211,6 +213,60 @@ def select_training_set(indices, model):
 
 plot_class_map(classifications)
 
+#%%
+# Creating a reproducible classifier
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The neural network classifier introduces a certain amount
+# of randomness when it it fitting based on the training
+# data. This randomness arises in the initial values
+# of the weights and biases that are fitted during the
+# training process, as well as the order in which the
+# training data are used.
+#
+# This means that two neural networks trained on identical
+# data will not produce the same results. To aid the
+# reproducibility of results that rely on a neural
+# network's classifications, a `random_state` integer
+# can be passed to :class:`mcalf.models.IBIS8542Model`
+# as we did above. When we set this value to an integer,
+# no matter how many times we train the neural network
+# on the same data, it will always give the same
+# results.
+#
+# Until better solutions are available to store trained
+# neural networks, a trained neural network can be saved
+# to a Python pickel file and later reloaded. For
+# maximum compatibility, it is recommended to reload
+# into the same version of scikit-learn and its
+# dependencies.
+#
+# The neural network trained above can be saved as follows,
+
+import pickle
+pkl = open('trained_neural_network.pkl', 'wb')
+pickle.dump(model.neural_network, pkl)
+pkl.close()
+
+#%%
+# This trained neural network can then be reloaded at a
+# later date as follows,
+
+import pickle
+pkl = open('trained_neural_network.pkl', 'rb')
+model.neural_network = pickle.load(pkl)  # Overwrite the default untrained model
+
+#%%
+# And you can see that the classifications of spectra are the same,
+
+plot_class_map(model.classify_spectra(row=range(60), column=range(50)))
+
+#%%
+# Please see the
+# `scikit-learn documentation <https://scikit-learn.org/stable/modules/model_persistence.html>`_
+# for more details on model persistence.
+
+
 #%%
 # Fitting the spectra
 # -------------------
@@ -282,16 +338,15 @@ def load_results(file):
 # and columns, and set the number of pools
 # based on the specification of your
 # processor.
-#
-# Results may differ as there is a random
-# factor when training the neural network.
+
+# result_list = model.fit(row=range(60), column=range(50), n_pools=6)
+
+#%%
 # The order of the :class:`mcalf.models.FitResult`
 # objects in this list will also differ as
 # the order that spectra finish fitting in
 # each pool is unpredictable.
 
-# result_list = model.fit(row=range(60), column=range(50), n_pools=6)
-
 #%%
 # Merging the FitResult objects
 # -----------------------------

diff --git a/src/mcalf/models/ibis.py b/src/mcalf/models/ibis.py
@@ -43,6 +43,7 @@ def __init__(self, **kwargs):
             'emission_max_bound',
             'absorption_x_scale',
             'emission_x_scale',
+            'random_state',
         ]  # These must match dictionary in STAGE 1 (defined there as stationary_line_core needs to be set)
         base_kwargs = {k: kwargs[k] for k in kwargs.keys() if k not in class_keys}
         super().__init__(**base_kwargs)
@@ -53,12 +54,6 @@ def __init__(self, **kwargs):
             self.stationary_line_core = 8542.099145376844
         # prefilter_response
         self._set_prefilter()  # Update the prefilter using stationary_line_core
-        # neural_network
-        if self.neural_network is None:
-            mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000)
-            parameter_space = {'alpha': [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]}  # Search region
-            # Set GridSearchCV to find best alpha
-            self.neural_network = GridSearchCV(mlp, parameter_space, cv=5, n_jobs=-1)
         # sigma
         if self.sigma is None or (isinstance(self.sigma, bool) and self.sigma):
             self.sigma = [generate_sigma(i, self.constant_wavelengths, self.stationary_line_core) for i in [1, 2]]
@@ -75,13 +70,15 @@ def __init__(self, **kwargs):
             'emission_max_bound': [np.inf, np.inf, 1, 1],
             'absorption_x_scale': [1500, 0.2, 0.3, 0.5],
             'emission_x_scale': [1500, 0.2, 0.3, 0.5],
+            'random_state': None,
         }
         assert defaults.keys() == {k: None for k in class_keys}.keys()  # keys of `defaults` must match `class_keys`
 
         # STAGE 2: Update defaults with any values specified in a config file
         class_defaults = {k: self.config[k] for k in self.config.keys() if k in defaults.keys()}
         for k in class_defaults.keys():
-            if k in ['absorption_x_scale', 'emission_x_scale']:  # These should not need the stationary line core
+            if k in ['absorption_x_scale', 'emission_x_scale', 'random_state']:
+                # These should not need the stationary line core
                 class_defaults[k] = load_parameter(class_defaults[k])
             else:
                 class_defaults[k] = load_parameter(class_defaults[k], wl=self.stationary_line_core)
@@ -105,6 +102,13 @@ def __init__(self, **kwargs):
         # attributes whose default value cannot be changed during initialisation
         self.quiescent_wavelength = 1  # Index of quiescent wavelength in the fitted_parameters
         self.active_wavelength = 5  # Index of active wavelength in the fitted_parameters
+        # neural_network
+        if self.neural_network is None:
+            mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(40,), max_iter=1000,
+                                random_state=defaults['random_state'])
+            parameter_space = {'alpha': [1e-5, 2e-5, 3e-5, 4e-5, 5e-5, 6e-5, 7e-5, 8e-5, 9e-5]}  # Search region
+            # Set GridSearchCV to find best alpha
+            self.neural_network = GridSearchCV(mlp, parameter_space, cv=5, n_jobs=-1)
 
         # STAGE 5: Validate the loaded attributes
         self._validate_attributes()
@@ -468,7 +472,10 @@ def plot_subtraction(self, *args, **kwargs):
     absorption_x_scale : array_like, length=4, optional, default=[1500, 0.2, 0.3, 0.5]
         Characteristic scale for all the absorption Voigt profile parameters in order of the function's arguments.
     emission_x_scale : array_like, length=4, optional, default=[1500, 0.2, 0.3, 0.5]
-        Characteristic scale for all the emission Voigt profile parameters in order of the function's arguments."""
+        Characteristic scale for all the emission Voigt profile parameters in order of the function's arguments.
+    random_state : int, numpy.random.RandomState, optional, default=None
+        Determines random number generation for weights and bias initialisation of the default `neural_network`.
+        Pass an int for reproducible results across multiple function calls."""
 
 # Form the docstring and do the replacements
 IBIS8542_PARAMETERS_STR = ''.join(IBIS8542_PARAMETERS[i] for i in IBIS8542_PARAMETERS)

diff --git a/src/mcalf/tests/models/test_ibis.py b/src/mcalf/tests/models/test_ibis.py
@@ -4,6 +4,8 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from astropy.io import fits
+from sklearn.model_selection import cross_val_score
+from sklearn.datasets import make_classification
 from sklearn.exceptions import NotFittedError
 
 from mcalf.models import ModelBase, IBIS8542Model, FitResults
@@ -668,3 +670,28 @@ def test_ibis8542model_save(ibis8542model_results, ibis8542model_resultsobjs, tm
         if not diff.identical:  # If this fails tolerances *may* need to be adjusted
             fits.printdiff(saved, truth, **diff_kwargs)
             raise ValueError(f"{saved.filename()} and {truth.filename()} differ")
+
+
+def test_random_state():
+
+    # Testing that the `random_state` kwarg works as expected on the system
+
+    # Arbitrary wavelength wavelength points
+    wavelengths = np.linspace(8541, 8544, 49)
+
+    # Initialise model
+    model = IBIS8542Model(original_wavelengths=wavelengths, random_state=0)
+
+    # Get sample classifications
+    X, y = make_classification(200, 49, n_classes=5, n_informative=4, random_state=0)
+
+    # Training #1
+    model.train(X[::2], y[::2])
+    score_a = cross_val_score(model.neural_network, X[1::2], y[1::2])
+
+    # Training #2
+    model.train(X[::2], y[::2])
+    score_b = cross_val_score(model.neural_network, X[1::2], y[1::2])
+
+    assert score_b == pytest.approx(score_a)
+    assert score_b == pytest.approx(np.array([0.45, 0.35, 0.45, 0.45, 0.35]))