From cd643a470b8bfbcbe0ae7e6564830dc693118d61 Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Sun, 3 Jul 2022 10:24:47 +0200
Subject: [PATCH 1/2] fix(parser): indentation of description

---
 .../documentation_parsing/_NumpyDocParser.py  | 11 +++++-----
 .../api/documentation/test_NumpyDocParser.py  | 22 ++++++++++++-------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/package-parser/package_parser/processing/api/documentation_parsing/_NumpyDocParser.py b/package-parser/package_parser/processing/api/documentation_parsing/_NumpyDocParser.py
index 043a89570..aa73ea427 100644
--- a/package-parser/package_parser/processing/api/documentation_parsing/_NumpyDocParser.py
+++ b/package-parser/package_parser/processing/api/documentation_parsing/_NumpyDocParser.py
@@ -64,8 +64,7 @@ def get_parameter_documentation(
         else:
             docstring = get_full_docstring(function_node)
 
-        # Find matching parameter docstrings. Numpydoc allows multiple parameters to be documented at once. See
-        # https://numpydoc.readthedocs.io/en/latest/format.html#parameters for more information.
+        # Find matching parameter docstrings
         function_numpydoc = self.__get_cached_function_numpydoc_string(
             function_node, docstring
         )
@@ -89,7 +88,7 @@ def get_parameter_documentation(
             type=type_,
             default_value=default_value,
             description="\n".join(
-                [line.strip() for line in last_parameter_numpydoc.desc]
+                [line.rstrip() for line in last_parameter_numpydoc.desc]
             ),
         )
 
@@ -122,9 +121,9 @@ def _get_description(numpydoc_string: NumpyDocString) -> str:
     extended_summary: list[str] = numpydoc_string.get("Extended Summary", [])
 
     result = ""
-    result += "\n".join([line.strip() for line in summary])
+    result += "\n".join([line.rstrip() for line in summary])
     result += "\n\n"
-    result += "\n".join([line.strip() for line in extended_summary])
+    result += "\n".join([line.rstrip() for line in extended_summary])
     return result.strip()
 
 
@@ -144,6 +143,8 @@ def _is_matching_parameter_numpydoc(
     else:
         lookup_name = parameter_name
 
+    # Numpydoc allows multiple parameters to be documented at once. See
+    # https://numpydoc.readthedocs.io/en/latest/format.html#parameters for more information.
     return any(
         name.strip() == lookup_name for name in parameter_numpydoc.name.split(",")
     )
diff --git a/package-parser/tests/processing/api/documentation/test_NumpyDocParser.py b/package-parser/tests/processing/api/documentation/test_NumpyDocParser.py
index cbecaab2d..0c6d931a7 100644
--- a/package-parser/tests/processing/api/documentation/test_NumpyDocParser.py
+++ b/package-parser/tests/processing/api/documentation/test_NumpyDocParser.py
@@ -18,7 +18,9 @@ def numpydoc_parser() -> NumpyDocParser:
 class_with_documentation = '''
 class C:
     """
-    Lorem ipsum.
+    Lorem ipsum. Code::
+
+        pass
 
     Dolor sit amet.
     """
@@ -37,8 +39,8 @@ class C:
         (
             class_with_documentation,
             ClassDocumentation(
-                description="Lorem ipsum.\n\nDolor sit amet.",
-                full_docstring="Lorem ipsum.\n\nDolor sit amet.",
+                description="Lorem ipsum. Code::\n\n    pass\n\nDolor sit amet.",
+                full_docstring="Lorem ipsum. Code::\n\n    pass\n\nDolor sit amet.",
             ),
         ),
         (
@@ -66,7 +68,9 @@ def test_get_class_documentation(
 function_with_documentation = '''
 def f():
     """
-    Lorem ipsum.
+    Lorem ipsum. Code::
+
+        pass
 
     Dolor sit amet.
     """
@@ -87,8 +91,8 @@ def f():
         (
             function_with_documentation,
             FunctionDocumentation(
-                description="Lorem ipsum.\n\nDolor sit amet.",
-                full_docstring="Lorem ipsum.\n\nDolor sit amet.",
+                description="Lorem ipsum. Code::\n\n    pass\n\nDolor sit amet.",
+                full_docstring="Lorem ipsum. Code::\n\n    pass\n\nDolor sit amet.",
             ),
         ),
         (
@@ -146,7 +150,9 @@ def f():
     Parameters
     ----------
     no_type_no_default
-        foo: no_type_no_default
+        foo: no_type_no_default. Code::
+
+            pass
     type_no_default : int
         foo: type_no_default
     optional_unknown_default : int, optional
@@ -199,7 +205,7 @@ def f():
             ParameterDocumentation(
                 type="",
                 default_value="",
-                description="foo: no_type_no_default",
+                description="foo: no_type_no_default. Code::\n\n    pass",
             ),
         ),
         (

From 80ba3687c8333b2f4b729ae7e8b949e5ef99fbd1 Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Sun, 3 Jul 2022 10:31:54 +0200
Subject: [PATCH 2/2] chore(data): update API data

---
 data/api/sklearn__api.json | 1944 ++++++++++++++++++------------------
 1 file changed, 972 insertions(+), 972 deletions(-)

diff --git a/data/api/sklearn__api.json b/data/api/sklearn__api.json
index 924bbad8e..8f43ec824 100644
--- a/data/api/sklearn__api.json
+++ b/data/api/sklearn__api.json
@@ -20537,7 +20537,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n\\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n= \\tilde{c}(y, \\phi)\n\\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.",
+            "description": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n    \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n    = \\tilde{c}(y, \\phi)\n        \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.",
             "docstring": "Base class for reproductive Exponential Dispersion Models (EDM).\n\nThe pdf of :math:`Y\\sim \\mathrm{EDM}(y_\\textrm{pred}, \\phi)` is given by\n\n.. math:: p(y| \\theta, \\phi) = c(y, \\phi)\n    \\exp\\left(\\frac{\\theta y-A(\\theta)}{\\phi}\\right)\n    = \\tilde{c}(y, \\phi)\n        \\exp\\left(-\\frac{d(y, y_\\textrm{pred})}{2\\phi}\\right)\n\nwith mean :math:`\\mathrm{E}[Y] = A'(\\theta) = y_\\textrm{pred}`,\nvariance :math:`\\mathrm{Var}[Y] = \\phi \\cdot v(y_\\textrm{pred})`,\nunit variance :math:`v(y_\\textrm{pred})` and\nunit deviance :math:`d(y,y_\\textrm{pred})`.\n\nMethods\n-------\ndeviance\ndeviance_derivative\nin_y_range\nunit_deviance\nunit_deviance_derivative\nunit_variance\n\nReferences\n----------\nhttps://en.wikipedia.org/wiki/Exponential_dispersion_model."
         },
         {
@@ -20615,7 +20615,7 @@
             "methods": ["sklearn/sklearn._loss.link/BaseLink/link", "sklearn/sklearn._loss.link/BaseLink/inverse"],
             "is_public": false,
             "reexported_by": [],
-            "description": "Abstract base class for differentiable, invertible link functions.\n\nConvention:\n- link function g: raw_prediction = g(y_pred)\n- inverse link h: y_pred = h(raw_prediction)\n\nFor (generalized) linear models, `raw_prediction = X @ coef` is the so\ncalled linear predictor, and `y_pred = h(raw_prediction)` is the predicted\nconditional (on X) expected value of the target `y_true`.\n\nThe methods are not implemented as staticmethods in case a link function needs\nparameters.",
+            "description": "Abstract base class for differentiable, invertible link functions.\n\nConvention:\n    - link function g: raw_prediction = g(y_pred)\n    - inverse link h: y_pred = h(raw_prediction)\n\nFor (generalized) linear models, `raw_prediction = X @ coef` is the so\ncalled linear predictor, and `y_pred = h(raw_prediction)` is the predicted\nconditional (on X) expected value of the target `y_true`.\n\nThe methods are not implemented as staticmethods in case a link function needs\nparameters.",
             "docstring": "Abstract base class for differentiable, invertible link functions.\n\nConvention:\n    - link function g: raw_prediction = g(y_pred)\n    - inverse link h: y_pred = h(raw_prediction)\n\nFor (generalized) linear models, `raw_prediction = X @ coef` is the so\ncalled linear predictor, and `y_pred = h(raw_prediction)` is the predicted\nconditional (on X) expected value of the target `y_true`.\n\nThe methods are not implemented as staticmethods in case a link function needs\nparameters."
         },
         {
@@ -20682,7 +20682,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "The symmetric multinomial logit function.\n\nConvention:\n- y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n\nNotes:\n- The inverse link h is the softmax function.\n- The sum is over the second axis, i.e. axis=1 (n_classes).\n\nWe have to choose additional constraints in order to make\n\ny_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)\n\nfor n_classes classes identifiable and invertible.\nWe choose the symmetric side constraint where the geometric mean response\nis set as reference category, see [2]:\n\nThe symmetric multinomial logit link function for a single data point is\nthen defined as\n\nraw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))\n= log(y_pred[k]) - mean(log(y_pred)).\n\nNote that this is equivalent to the definition in [1] and implies mean\ncentered raw predictions:\n\nsum(raw_prediction[k], k=0..n_classes-1) = 0.\n\nFor linear models with raw_prediction = X @ coef, this corresponds to\nsum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every\nfeature is zero.",
+            "description": "The symmetric multinomial logit function.\n\nConvention:\n    - y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n\nNotes:\n    - The inverse link h is the softmax function.\n    - The sum is over the second axis, i.e. axis=1 (n_classes).\n\nWe have to choose additional constraints in order to make\n\n    y_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)\n\nfor n_classes classes identifiable and invertible.\nWe choose the symmetric side constraint where the geometric mean response\nis set as reference category, see [2]:\n\nThe symmetric multinomial logit link function for a single data point is\nthen defined as\n\n    raw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))\n    = log(y_pred[k]) - mean(log(y_pred)).\n\nNote that this is equivalent to the definition in [1] and implies mean\ncentered raw predictions:\n\n    sum(raw_prediction[k], k=0..n_classes-1) = 0.\n\nFor linear models with raw_prediction = X @ coef, this corresponds to\nsum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every\nfeature is zero.",
             "docstring": "The symmetric multinomial logit function.\n\nConvention:\n    - y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n\nNotes:\n    - The inverse link h is the softmax function.\n    - The sum is over the second axis, i.e. axis=1 (n_classes).\n\nWe have to choose additional constraints in order to make\n\n    y_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)\n\nfor n_classes classes identifiable and invertible.\nWe choose the symmetric side constraint where the geometric mean response\nis set as reference category, see [2]:\n\nThe symmetric multinomial logit link function for a single data point is\nthen defined as\n\n    raw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))\n    = log(y_pred[k]) - mean(log(y_pred)).\n\nNote that this is equivalent to the definition in [1] and implies mean\ncentered raw predictions:\n\n    sum(raw_prediction[k], k=0..n_classes-1) = 0.\n\nFor linear models with raw_prediction = X @ coef, this corresponds to\nsum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every\nfeature is zero.\n\nReference\n---------\n.. [1] Friedman, Jerome; Hastie, Trevor; Tibshirani, Robert. \"Additive\n    logistic regression: a statistical view of boosting\" Ann. Statist.\n    28 (2000), no. 2, 337--407. doi:10.1214/aos/1016218223.\n    https://projecteuclid.org/euclid.aos/1016218223\n\n.. [2] Zahid, Faisal Maqbool and Gerhard Tutz. \"Ridge estimation for\n    multinomial logit models with symmetric side constraints.\"\n    Computational Statistics 28 (2013): 1017-1034.\n    http://epub.ub.uni-muenchen.de/11001/1/tr067.pdf"
         },
         {
@@ -20697,7 +20697,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Absolute error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the absolute error is defined as::\n\nloss(x_i) = |y_true_i - raw_prediction_i|",
+            "description": "Absolute error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the absolute error is defined as::\n\n    loss(x_i) = |y_true_i - raw_prediction_i|",
             "docstring": "Absolute error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the absolute error is defined as::\n\n    loss(x_i) = |y_true_i - raw_prediction_i|"
         },
         {
@@ -20721,7 +20721,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Base class for a loss function of 1-dimensional targets.\n\nConventions:\n\n- y_true.shape = sample_weight.shape = (n_samples,)\n- y_pred.shape = raw_prediction.shape = (n_samples,)\n- If is_multiclass is true (multiclass classification), then\ny_pred.shape = raw_prediction.shape = (n_samples, n_classes)\nNote that this corresponds to the return value of decision_function.\n\ny_true, y_pred, sample_weight and raw_prediction must either be all float64\nor all float32.\ngradient and hessian must be either both float64 or both float32.\n\nNote that y_pred = link.inverse(raw_prediction).\n\nSpecific loss classes can inherit specific link classes to satisfy\nBaseLink's abstractmethods.",
+            "description": "Base class for a loss function of 1-dimensional targets.\n\nConventions:\n\n    - y_true.shape = sample_weight.shape = (n_samples,)\n    - y_pred.shape = raw_prediction.shape = (n_samples,)\n    - If is_multiclass is true (multiclass classification), then\n      y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n      Note that this corresponds to the return value of decision_function.\n\ny_true, y_pred, sample_weight and raw_prediction must either be all float64\nor all float32.\ngradient and hessian must be either both float64 or both float32.\n\nNote that y_pred = link.inverse(raw_prediction).\n\nSpecific loss classes can inherit specific link classes to satisfy\nBaseLink's abstractmethods.",
             "docstring": "Base class for a loss function of 1-dimensional targets.\n\nConventions:\n\n    - y_true.shape = sample_weight.shape = (n_samples,)\n    - y_pred.shape = raw_prediction.shape = (n_samples,)\n    - If is_multiclass is true (multiclass classification), then\n      y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n      Note that this corresponds to the return value of decision_function.\n\ny_true, y_pred, sample_weight and raw_prediction must either be all float64\nor all float32.\ngradient and hessian must be either both float64 or both float32.\n\nNote that y_pred = link.inverse(raw_prediction).\n\nSpecific loss classes can inherit specific link classes to satisfy\nBaseLink's abstractmethods.\n\nParameters\n----------\nsample_weight : {None, ndarray}\n    If sample_weight is None, the hessian might be constant.\nn_classes : {None, int}\n    The number of classes for classification, else None.\n\nAttributes\n----------\ncloss: CyLossFunction\nlink : BaseLink\ninterval_y_true : Interval\n    Valid interval for y_true\ninterval_y_pred : Interval\n    Valid Interval for y_pred\ndifferentiable : bool\n    Indicates whether or not loss function is differentiable in\n    raw_prediction everywhere.\nneed_update_leaves_values : bool\n    Indicates whether decision trees in gradient boosting need to uptade\n    leave values after having been fit to the (negative) gradients.\napprox_hessian : bool\n    Indicates whether the hessian is approximated or exact. If,\n    approximated, it should be larger or equal to the exact one.\nconstant_hessian : bool\n    Indicates whether the hessian is one for this loss.\nis_multiclass : bool\n    Indicates whether n_classes > 2 is allowed."
         },
         {
@@ -20737,7 +20737,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Binomial deviance loss with logit link, for binary classification.\n\nThis is also know as binary cross entropy, log-loss and logistic loss.\n\nDomain:\ny_true in [0, 1], i.e. regression on the unit interval\ny_pred in (0, 1), i.e. boundaries excluded\n\nLink:\ny_pred = expit(raw_prediction)\n\nFor a given sample x_i, half Binomial deviance is defined as the negative\nlog-likelihood of the Binomial/Bernoulli distribution and can be expressed\nas::\n\nloss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).\n\nNote that the formulation works for classification, y = {0, 1}, as well as\nlogistic regression, y = [0, 1].\nIf you add `constant_to_optimal_zero` to the loss, you get half the\nBernoulli/binomial deviance.",
+            "description": "Half Binomial deviance loss with logit link, for binary classification.\n\nThis is also know as binary cross entropy, log-loss and logistic loss.\n\nDomain:\ny_true in [0, 1], i.e. regression on the unit interval\ny_pred in (0, 1), i.e. boundaries excluded\n\nLink:\ny_pred = expit(raw_prediction)\n\nFor a given sample x_i, half Binomial deviance is defined as the negative\nlog-likelihood of the Binomial/Bernoulli distribution and can be expressed\nas::\n\n    loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).\n\nNote that the formulation works for classification, y = {0, 1}, as well as\nlogistic regression, y = [0, 1].\nIf you add `constant_to_optimal_zero` to the loss, you get half the\nBernoulli/binomial deviance.",
             "docstring": "Half Binomial deviance loss with logit link, for binary classification.\n\nThis is also know as binary cross entropy, log-loss and logistic loss.\n\nDomain:\ny_true in [0, 1], i.e. regression on the unit interval\ny_pred in (0, 1), i.e. boundaries excluded\n\nLink:\ny_pred = expit(raw_prediction)\n\nFor a given sample x_i, half Binomial deviance is defined as the negative\nlog-likelihood of the Binomial/Bernoulli distribution and can be expressed\nas::\n\n    loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).\n\nNote that the formulation works for classification, y = {0, 1}, as well as\nlogistic regression, y = [0, 1].\nIf you add `constant_to_optimal_zero` to the loss, you get half the\nBernoulli/binomial deviance."
         },
         {
@@ -20752,7 +20752,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Gamma deviance loss with log-link, for regression.\n\nDomain:\ny_true and y_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Gamma deviance loss is defined as::\n\nloss(x_i) = log(exp(raw_prediction_i)/y_true_i)\n+ y_true/exp(raw_prediction_i) - 1\n\nHalf the Gamma deviance is actually proportional to the negative log-\nlikelihood up to constant terms (not involving raw_prediction) and\nsimplifies the computation of the gradients.\nWe also skip the constant term `-log(y_true_i) - 1`.",
+            "description": "Half Gamma deviance loss with log-link, for regression.\n\nDomain:\ny_true and y_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Gamma deviance loss is defined as::\n\n    loss(x_i) = log(exp(raw_prediction_i)/y_true_i)\n                + y_true/exp(raw_prediction_i) - 1\n\nHalf the Gamma deviance is actually proportional to the negative log-\nlikelihood up to constant terms (not involving raw_prediction) and\nsimplifies the computation of the gradients.\nWe also skip the constant term `-log(y_true_i) - 1`.",
             "docstring": "Half Gamma deviance loss with log-link, for regression.\n\nDomain:\ny_true and y_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Gamma deviance loss is defined as::\n\n    loss(x_i) = log(exp(raw_prediction_i)/y_true_i)\n                + y_true/exp(raw_prediction_i) - 1\n\nHalf the Gamma deviance is actually proportional to the negative log-\nlikelihood up to constant terms (not involving raw_prediction) and\nsimplifies the computation of the gradients.\nWe also skip the constant term `-log(y_true_i) - 1`."
         },
         {
@@ -20770,7 +20770,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Categorical cross-entropy loss, for multiclass classification.\n\nDomain:\ny_true in {0, 1, 2, 3, .., n_classes - 1}\ny_pred has n_classes elements, each element in (0, 1)\n\nLink:\ny_pred = softmax(raw_prediction)\n\nNote: We assume y_true to be already label encoded. The inverse link is\nsoftmax. But the full link function is the symmetric multinomial logit\nfunction.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the multinomial distribution, it\ngeneralizes the binary cross-entropy to more than 2 classes::\n\nloss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))\n- sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)\n\nSee [1].\n\nNote that for the hessian, we calculate only the diagonal part in the\nclasses: If the full hessian for classes k and l and sample i is H_i_k_l,\nwe calculate H_i_k_k, i.e. k=l.",
+            "description": "Categorical cross-entropy loss, for multiclass classification.\n\nDomain:\ny_true in {0, 1, 2, 3, .., n_classes - 1}\ny_pred has n_classes elements, each element in (0, 1)\n\nLink:\ny_pred = softmax(raw_prediction)\n\nNote: We assume y_true to be already label encoded. The inverse link is\nsoftmax. But the full link function is the symmetric multinomial logit\nfunction.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the multinomial distribution, it\ngeneralizes the binary cross-entropy to more than 2 classes::\n\n    loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))\n            - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)\n\nSee [1].\n\nNote that for the hessian, we calculate only the diagonal part in the\nclasses: If the full hessian for classes k and l and sample i is H_i_k_l,\nwe calculate H_i_k_k, i.e. k=l.",
             "docstring": "Categorical cross-entropy loss, for multiclass classification.\n\nDomain:\ny_true in {0, 1, 2, 3, .., n_classes - 1}\ny_pred has n_classes elements, each element in (0, 1)\n\nLink:\ny_pred = softmax(raw_prediction)\n\nNote: We assume y_true to be already label encoded. The inverse link is\nsoftmax. But the full link function is the symmetric multinomial logit\nfunction.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the multinomial distribution, it\ngeneralizes the binary cross-entropy to more than 2 classes::\n\n    loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))\n            - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)\n\nSee [1].\n\nNote that for the hessian, we calculate only the diagonal part in the\nclasses: If the full hessian for classes k and l and sample i is H_i_k_l,\nwe calculate H_i_k_k, i.e. k=l.\n\nReference\n---------\n.. [1] :arxiv:`Simon, Noah, J. Friedman and T. Hastie.\n    \"A Blockwise Descent Algorithm for Group-penalized Multiresponse and\n    Multinomial Regression\".\n    <1311.6529>`"
         },
         {
@@ -20785,7 +20785,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Poisson deviance loss with log-link, for regression.\n\nDomain:\ny_true in non-negative real numbers\ny_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half the Poisson deviance is defined as::\n\nloss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))\n- y_true_i + exp(raw_prediction_i)\n\nHalf the Poisson deviance is actually the negative log-likelihood up to\nconstant terms (not involving raw_prediction) and simplifies the\ncomputation of the gradients.\nWe also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.",
+            "description": "Half Poisson deviance loss with log-link, for regression.\n\nDomain:\ny_true in non-negative real numbers\ny_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half the Poisson deviance is defined as::\n\n    loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))\n                - y_true_i + exp(raw_prediction_i)\n\nHalf the Poisson deviance is actually the negative log-likelihood up to\nconstant terms (not involving raw_prediction) and simplifies the\ncomputation of the gradients.\nWe also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.",
             "docstring": "Half Poisson deviance loss with log-link, for regression.\n\nDomain:\ny_true in non-negative real numbers\ny_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half the Poisson deviance is defined as::\n\n    loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))\n                - y_true_i + exp(raw_prediction_i)\n\nHalf the Poisson deviance is actually the negative log-likelihood up to\nconstant terms (not involving raw_prediction) and simplifies the\ncomputation of the gradients.\nWe also skip the constant term `y_true_i * log(y_true_i) - y_true_i`."
         },
         {
@@ -20797,7 +20797,7 @@
             "methods": ["sklearn/sklearn._loss.loss/HalfSquaredError/__init__"],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half squared error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half squared error is defined as::\n\nloss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2\n\nThe factor of 0.5 simplifies the computation of gradients and results in a\nunit hessian (and is consistent with what is done in LightGBM). It is also\nhalf the Normal distribution deviance.",
+            "description": "Half squared error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half squared error is defined as::\n\n    loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2\n\nThe factor of 0.5 simplifies the computation of gradients and results in a\nunit hessian (and is consistent with what is done in LightGBM). It is also\nhalf the Normal distribution deviance.",
             "docstring": "Half squared error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half squared error is defined as::\n\n    loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2\n\nThe factor of 0.5 simplifies the computation of gradients and results in a\nunit hessian (and is consistent with what is done in LightGBM). It is also\nhalf the Normal distribution deviance."
         },
         {
@@ -20812,7 +20812,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\nloss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n- y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n+ exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
+            "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation."
         },
         {
@@ -20824,7 +20824,7 @@
             "methods": ["sklearn/sklearn._loss.loss/HalfTweedieLossIdentity/__init__"],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Tweedie deviance loss with identity link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers for power != 0\ny_pred in real numbers for power = 0\npower in real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\nloss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n- y_true_i * raw_prediction_i**(1-p) / (1-p)\n+ raw_prediction_i**(2-p) / (2-p)\n\nNote that the minimum value of this loss is 0.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
+            "description": "Half Tweedie deviance loss with identity link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers for power != 0\ny_pred in real numbers for power = 0\npower in real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * raw_prediction_i**(1-p) / (1-p)\n                + raw_prediction_i**(2-p) / (2-p)\n\nNote that the minimum value of this loss is 0.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": "Half Tweedie deviance loss with identity link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers for power != 0\ny_pred in real numbers for power = 0\npower in real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * raw_prediction_i**(1-p) / (1-p)\n                + raw_prediction_i**(2-p) / (2-p)\n\nNote that the minimum value of this loss is 0.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation."
         },
         {
@@ -20839,7 +20839,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Quantile loss aka pinball loss, for regression.\n\nDomain:\ny_true and y_pred all real numbers\nquantile in (0, 1)\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the pinball loss is defined as::\n\nloss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)\n\nrho_{quantile}(u) = u * (quantile - 1_{u<0})\n= -u *(1 - quantile)  if u < 0\nu * quantile       if u >= 0\n\nNote: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().",
+            "description": "Quantile loss aka pinball loss, for regression.\n\nDomain:\ny_true and y_pred all real numbers\nquantile in (0, 1)\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the pinball loss is defined as::\n\n    loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)\n\n    rho_{quantile}(u) = u * (quantile - 1_{u<0})\n                      = -u *(1 - quantile)  if u < 0\n                         u * quantile       if u >= 0\n\nNote: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().",
             "docstring": "Quantile loss aka pinball loss, for regression.\n\nDomain:\ny_true and y_pred all real numbers\nquantile in (0, 1)\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the pinball loss is defined as::\n\n    loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)\n\n    rho_{quantile}(u) = u * (quantile - 1_{u<0})\n                      = -u *(1 - quantile)  if u < 0\n                         u * quantile       if u >= 0\n\nNote: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().\n\nAdditional Attributes\n---------------------\nquantile : float\n    The quantile to be estimated. Must be in range (0, 1)."
         },
         {
@@ -21438,7 +21438,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.cluster"],
-            "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\nnp.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
+            "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
             "docstring": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.\n\nParameters\n----------\nn_clusters : int, default=8\n    The dimension of the projection subspace.\n\neigen_solver : {'arpack', 'lobpcg', 'amg'}, default=None\n    The eigenvalue decomposition strategy to use. AMG requires pyamg\n    to be installed. It can be faster on very large, sparse problems,\n    but may also lead to instabilities. If None, then ``'arpack'`` is\n    used. See [4]_ for more details regarding `'lobpcg'`.\n\nn_components : int, default=n_clusters\n    Number of eigenvectors to use for the spectral embedding.\n\nrandom_state : int, RandomState instance, default=None\n    A pseudo random number generator used for the initialization\n    of the lobpcg eigenvectors decomposition when `eigen_solver ==\n    'amg'`, and for the K-Means initialization. Use an int to make\n    the results deterministic across calls (See\n    :term:`Glossary <random_state>`).\n\n    .. note::\n        When using `eigen_solver == 'amg'`,\n        it is necessary to also fix the global numpy seed with\n        `np.random.seed(int)` to get deterministic results. See\n        https://github.com/pyamg/pyamg/issues/139 for further\n        information.\n\nn_init : int, default=10\n    Number of time the k-means algorithm will be run with different\n    centroid seeds. The final results will be the best output of n_init\n    consecutive runs in terms of inertia. Only used if\n    ``assign_labels='kmeans'``.\n\ngamma : float, default=1.0\n    Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.\n    Ignored for ``affinity='nearest_neighbors'``.\n\naffinity : str or callable, default='rbf'\n    How to construct the affinity matrix.\n     - 'nearest_neighbors': construct the affinity matrix by computing a\n       graph of nearest neighbors.\n     - 'rbf': construct the affinity matrix using a radial basis function\n       (RBF) kernel.\n     - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n       where larger values indicate greater similarity between instances.\n     - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n       of precomputed distances, and construct a binary affinity matrix\n       from the ``n_neighbors`` nearest neighbors of each instance.\n     - one of the kernels supported by\n       :func:`~sklearn.metrics.pairwise_kernels`.\n\n    Only kernels that produce similarity scores (non-negative values that\n    increase with similarity) should be used. This property is not checked\n    by the clustering algorithm.\n\nn_neighbors : int, default=10\n    Number of neighbors to use when constructing the affinity matrix using\n    the nearest neighbors method. Ignored for ``affinity='rbf'``.\n\neigen_tol : float, default=0.0\n    Stopping criterion for eigendecomposition of the Laplacian matrix\n    when ``eigen_solver='arpack'``.\n\nassign_labels : {'kmeans', 'discretize', 'cluster_qr'}, default='kmeans'\n    The strategy for assigning labels in the embedding space. There are two\n    ways to assign labels after the Laplacian embedding. k-means is a\n    popular choice, but it can be sensitive to initialization.\n    Discretization is another approach which is less sensitive to random\n    initialization [3]_.\n    The cluster_qr method [5]_ directly extract clusters from eigenvectors\n    in spectral clustering. In contrast to k-means and discretization, cluster_qr\n    has no tuning parameters and runs no iterations, yet may outperform\n    k-means and discretization in terms of both quality and speed.\n\n    .. versionchanged:: 1.1\n       Added new labeling method 'cluster_qr'.\n\ndegree : float, default=3\n    Degree of the polynomial kernel. Ignored by other kernels.\n\ncoef0 : float, default=1\n    Zero coefficient for polynomial and sigmoid kernels.\n    Ignored by other kernels.\n\nkernel_params : dict of str to any, default=None\n    Parameters (keyword arguments) and values for kernel passed as\n    callable object. Ignored by other kernels.\n\nn_jobs : int, default=None\n    The number of parallel jobs to run when `affinity='nearest_neighbors'`\n    or `affinity='precomputed_nearest_neighbors'`. The neighbors search\n    will be done in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : bool, default=False\n    Verbosity mode.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\naffinity_matrix_ : array-like of shape (n_samples, n_samples)\n    Affinity matrix used for clustering. Available only after calling\n    ``fit``.\n\nlabels_ : ndarray of shape (n_samples,)\n    Labels of each point\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.cluster.KMeans : K-Means clustering.\nsklearn.cluster.DBSCAN : Density-Based Spatial Clustering of\n    Applications with Noise.\n\nNotes\n-----\nA distance matrix for which 0 indicates identical elements and high values\nindicate very dissimilar elements can be transformed into an affinity /\nsimilarity matrix that is well-suited for the algorithm by\napplying the Gaussian (aka RBF, heat) kernel::\n\n    np.exp(- dist_matrix ** 2 / (2. * delta ** 2))\n\nwhere ``delta`` is a free parameter representing the width of the Gaussian\nkernel.\n\nAn alternative is to take a symmetric version of the k-nearest neighbors\nconnectivity matrix of the points.\n\nIf the pyamg package is installed, it is used: this greatly\nspeeds up computation.\n\nReferences\n----------\n.. [1] :doi:`Normalized cuts and image segmentation, 2000\n       Jianbo Shi, Jitendra Malik\n       <10.1109/34.868688>`\n\n.. [2] :doi:`A Tutorial on Spectral Clustering, 2007\n       Ulrike von Luxburg\n       <10.1007/s11222-007-9033-z>`\n\n.. [3] `Multiclass spectral clustering, 2003\n       Stella X. Yu, Jianbo Shi\n       <https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf>`_\n\n.. [4] `Toward the Optimal Preconditioned Eigensolver:\n       Locally Optimal Block Preconditioned Conjugate Gradient Method, 2001.\n       A. V. Knyazev\n       SIAM Journal on Scientific Computing 23, no. 2, pp. 517-541.\n       <https://epubs.siam.org/doi/pdf/10.1137/S1064827500366124>`_\n\n.. [5] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n       Anil Damle, Victor Minden, Lexing Ying\n       <10.1093/imaiai/iay008>`\n\nExamples\n--------\n>>> from sklearn.cluster import SpectralClustering\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [1, 0],\n...               [4, 7], [3, 5], [3, 6]])\n>>> clustering = SpectralClustering(n_clusters=2,\n...         assign_labels='discretize',\n...         random_state=0).fit(X)\n>>> clustering.labels_\narray([1, 1, 1, 0, 0, 0])\n>>> clustering\nSpectralClustering(assign_labels='discretize', n_clusters=2,\n    random_state=0)"
         },
         {
@@ -21508,7 +21508,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.compose"],
-            "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\nregressor.fit(X, func(y))\n\nor::\n\nregressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\ninverse_func(regressor.predict(X))\n\nor::\n\ntransformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20",
+            "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\n    regressor.fit(X, func(y))\n\nor::\n\n    regressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\n    inverse_func(regressor.predict(X))\n\nor::\n\n    transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20",
             "docstring": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\n    regressor.fit(X, func(y))\n\nor::\n\n    regressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\n    inverse_func(regressor.predict(X))\n\nor::\n\n    transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20\n\nParameters\n----------\nregressor : object, default=None\n    Regressor object such as derived from\n    :class:`~sklearn.base.RegressorMixin`. This regressor will\n    automatically be cloned each time prior to fitting. If `regressor is\n    None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.\n\ntransformer : object, default=None\n    Estimator object such as derived from\n    :class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time\n    as `func` and `inverse_func`. If `transformer is None` as well as\n    `func` and `inverse_func`, the transformer will be an identity\n    transformer. Note that the transformer will be cloned during fitting.\n    Also, the transformer is restricting `y` to be a numpy array.\n\nfunc : function, default=None\n    Function to apply to `y` before passing to :meth:`fit`. Cannot be set\n    at the same time as `transformer`. The function needs to return a\n    2-dimensional array. If `func is None`, the function used will be the\n    identity function.\n\ninverse_func : function, default=None\n    Function to apply to the prediction of the regressor. Cannot be set at\n    the same time as `transformer`. The function needs to return a\n    2-dimensional array. The inverse function is used to return\n    predictions to the same space of the original training labels.\n\ncheck_inverse : bool, default=True\n    Whether to check that `transform` followed by `inverse_transform`\n    or `func` followed by `inverse_func` leads to the original targets.\n\nAttributes\n----------\nregressor_ : object\n    Fitted regressor.\n\ntransformer_ : object\n    Transformer used in :meth:`fit` and :meth:`predict`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if the\n    underlying regressor exposes such an attribute when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.preprocessing.FunctionTransformer : Construct a transformer from an\n    arbitrary callable.\n\nNotes\n-----\nInternally, the target `y` is always converted into a 2-dimensional array\nto be used by scikit-learn transformers. At the time of prediction, the\noutput will be reshaped to a have the same number of dimensions as `y`.\n\nSee :ref:`examples/compose/plot_transformed_target.py\n<sphx_glr_auto_examples_compose_plot_transformed_target.py>`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.compose import TransformedTargetRegressor\n>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),\n...                                 func=np.log, inverse_func=np.exp)\n>>> X = np.arange(4).reshape(-1, 1)\n>>> y = np.exp(2 * X).ravel()\n>>> tt.fit(X, y)\nTransformedTargetRegressor(...)\n>>> tt.score(X, y)\n1.0\n>>> tt.regressor_.coef_\narray([2.])"
         },
         {
@@ -21562,7 +21562,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.covariance"],
-            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\nGraphLasso has been renamed to GraphicalLasso",
+            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso",
             "docstring": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso\n\nParameters\n----------\nalpha : float, default=0.01\n    The regularization parameter: the higher alpha, the more\n    regularization, the sparser the inverse covariance.\n    Range is (0, inf].\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where p > n. Elsewhere prefer cd\n    which is more numerically stable.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    The maximum number of iterations.\n\nverbose : bool, default=False\n    If verbose is True, the objective function and dual gap are\n    plotted at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated pseudo inverse matrix.\n\nn_iter_ : int\n    Number of iterations run.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLassoCV : Sparse inverse covariance with\n    cross-validated choice of the l1 penalty.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLasso\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLasso().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.049, 0.218, 0.019],\n       [0.049, 0.364, 0.017, 0.034],\n       [0.218, 0.017, 0.322, 0.093],\n       [0.019, 0.034, 0.093, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])"
         },
         {
@@ -21577,7 +21577,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.covariance"],
-            "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\nGraphLassoCV has been renamed to GraphicalLassoCV",
+            "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV",
             "docstring": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV\n\nParameters\n----------\nalphas : int or array-like of shape (n_alphas,), dtype=float, default=4\n    If an integer is given, it fixes the number of points on the\n    grids of alpha to be used. If a list is given, it gives the\n    grid to be used. See the notes in the class docstring for\n    more details. Range is (0, inf] when floats given.\n\nn_refinements : int, default=4\n    The number of times the grid is refined. Not used if explicit\n    values of alphas are passed. Range is [1, inf).\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - integer, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.20\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    Maximum number of iterations.\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where number of features is greater\n    than number of samples. Elsewhere prefer cd which is more numerically\n    stable.\n\nn_jobs : int, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n    .. versionchanged:: v0.20\n       `n_jobs` default changed from 1 to None\n\nverbose : bool, default=False\n    If verbose is True, the objective function and duality gap are\n    printed at each iteration.\n\nassume_centered : bool, default=False\n    If True, data are not centered before computation.\n    Useful when working with data whose mean is almost, but not exactly\n    zero.\n    If False, data are centered before computation.\n\nAttributes\n----------\nlocation_ : ndarray of shape (n_features,)\n    Estimated location, i.e. the estimated mean.\n\ncovariance_ : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix.\n\nprecision_ : ndarray of shape (n_features, n_features)\n    Estimated precision matrix (inverse covariance).\n\nalpha_ : float\n    Penalization parameter selected.\n\ncv_results_ : dict of ndarrays\n    A dict with keys:\n\n    alphas : ndarray of shape (n_alphas,)\n        All penalization parameters explored.\n\n    split(k)_test_score : ndarray of shape (n_alphas,)\n        Log-likelihood score on left-out data across (k)th fold.\n\n        .. versionadded:: 1.0\n\n    mean_test_score : ndarray of shape (n_alphas,)\n        Mean of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    std_test_score : ndarray of shape (n_alphas,)\n        Standard deviation of scores over the folds.\n\n        .. versionadded:: 1.0\n\n    split(k)_score : ndarray of shape (n_alphas,)\n        Log-likelihood score on left-out data across (k)th fold.\n\n        .. deprecated:: 1.0\n            `split(k)_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `split(k)_test_score` instead.\n\n    mean_score : ndarray of shape (n_alphas,)\n        Mean of scores over the folds.\n\n        .. deprecated:: 1.0\n            `mean_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `mean_test_score` instead.\n\n    std_score : ndarray of shape (n_alphas,)\n        Standard deviation of scores over the folds.\n\n        .. deprecated:: 1.0\n            `std_score` is deprecated in 1.0 and will be removed in 1.2.\n            Use `std_test_score` instead.\n\n    .. versionadded:: 0.24\n\nn_iter_ : int\n    Number of iterations run for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\ngraphical_lasso : L1-penalized covariance estimator.\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\n\nNotes\n-----\nThe search for the optimal penalization parameter (`alpha`) is done on an\niteratively refined grid: first the cross-validated scores on a grid are\ncomputed, then a new refined grid is centered around the maximum, and so\non.\n\nOne of the challenges which is faced here is that the solvers can\nfail to converge to a well-conditioned estimate. The corresponding\nvalues of `alpha` then come out as missing values, but the optimum may\nbe close to these missing values.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.covariance import GraphicalLassoCV\n>>> true_cov = np.array([[0.8, 0.0, 0.2, 0.0],\n...                      [0.0, 0.4, 0.0, 0.0],\n...                      [0.2, 0.0, 0.3, 0.1],\n...                      [0.0, 0.0, 0.1, 0.7]])\n>>> np.random.seed(0)\n>>> X = np.random.multivariate_normal(mean=[0, 0, 0, 0],\n...                                   cov=true_cov,\n...                                   size=200)\n>>> cov = GraphicalLassoCV().fit(X)\n>>> np.around(cov.covariance_, decimals=3)\narray([[0.816, 0.051, 0.22 , 0.017],\n       [0.051, 0.364, 0.018, 0.036],\n       [0.22 , 0.018, 0.322, 0.094],\n       [0.017, 0.036, 0.094, 0.69 ]])\n>>> np.around(cov.location_, decimals=3)\narray([0.073, 0.04 , 0.038, 0.143])"
         },
         {
@@ -21783,7 +21783,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n(U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1.0\n    Sparsity controlling parameter.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for numerical error.\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (:func:`~sklearn.linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n      faster if the estimated components are sparse.\n\n    .. versionadded:: 0.17\n       *cd* coordinate descent method to improve speed.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (:func:`~sklearn.linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n      will be faster if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\n    .. versionadded:: 0.17\n       *lasso_cd* coordinate descent method to improve speed.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\nn_jobs : int or None, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the code, for warm restart. Only used if `code_init`\n    and `dict_init` are not None.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary, for warm restart. Only used if\n    `code_init` and `dict_init` are not None.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    dictionary atoms extracted from the data\n\nerror_ : array\n    vector of errors at each iteration\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations run.\n\nSee Also\n--------\nMiniBatchDictionaryLearning: A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import DictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False\n... )\n>>> dict_learner = DictionaryLearning(\n...     n_components=15, transform_algorithm='lasso_lars', transform_alpha=0.1,\n...     random_state=42,\n... )\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.41...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.07..."
         },
         {
@@ -21809,7 +21809,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n(U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of dictionary elements to extract.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=1000\n    Total number of iterations over data batches to perform.\n\n    .. deprecated:: 1.1\n       ``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n       ``max_iter`` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nfit_algorithm : {'lars', 'cd'}, default='lars'\n    The algorithm used:\n\n    - `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`)\n    - `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nbatch_size : int, default=3\n    Number of samples in each mini-batch.\n\nshuffle : bool, default=True\n    Whether to shuffle the samples before forming batches.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value of the dictionary for warm restart scenarios.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution.\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n      if the estimated components are sparse.\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution.\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and\n    `algorithm='omp'`. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `None`, defaults to `alpha`.\n\nverbose : bool or int, default=False\n    To control the verbosity of the procedure.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\n    .. versionadded:: 0.22\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\n    .. versionadded:: 1.1\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Components extracted from the data.\n\ninner_stats_ : tuple of (A, B) ndarrays\n    Internal sufficient statistics that are kept by the algorithm.\n    Keeping them is useful in online settings, to avoid losing the\n    history of the evolution, but they shouldn't have any use for the\n    end user.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats_` serves internal purpose only and will be removed in 1.3.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_iter_ : int\n    Number of iterations over the full dataset.\n\niter_offset_ : int\n    The number of iteration on data batches that has been performed before.\n\n    .. deprecated:: 1.1\n       `iter_offset_` has been renamed `n_steps_` and will be removed in 1.3.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generattor or by `np.random`.\n\n    .. deprecated:: 1.1\n       `random_state_` serves internal purpose only and will be removed in 1.3.\n\nn_steps_ : int\n    Number of mini-batches processed.\n\n    .. versionadded:: 1.1\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\n\nReferences\n----------\n\nJ. Mairal, F. Bach, J. Ponce, G. Sapiro, 2009: Online dictionary learning\nfor sparse coding (https://www.di.ens.fr/sierra/pdfs/icml09.pdf)\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.datasets import make_sparse_coded_signal\n>>> from sklearn.decomposition import MiniBatchDictionaryLearning\n>>> X, dictionary, code = make_sparse_coded_signal(\n...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,\n...     random_state=42, data_transposed=False)\n>>> dict_learner = MiniBatchDictionaryLearning(\n...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',\n...     transform_alpha=0.1, random_state=42)\n>>> X_transformed = dict_learner.fit_transform(X)\n\nWe can check the level of sparsity of `X_transformed`:\n\n>>> np.mean(X_transformed == 0)\n0.38...\n\nWe can compare the average squared euclidean norm of the reconstruction\nerror of the sparse coded signal relative to the squared euclidean norm of\nthe original signal:\n\n>>> X_hat = X_transformed @ dict_learner.components_\n>>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))\n0.059..."
         },
         {
@@ -21829,7 +21829,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\nX ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
+            "description": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
             "docstring": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.\n\nParameters\n----------\ndictionary : ndarray of shape (n_components, n_features)\n    The dictionary atoms used for sparse coding. Lines are assumed to be\n    normalized to unit norm.\n\ntransform_algorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}, default='omp'\n    Algorithm used to transform the data:\n\n    - `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    - `'lasso_lars'`: uses Lars to compute the Lasso solution;\n    - `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n      the estimated components are sparse;\n    - `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution;\n    - `'threshold'`: squashes to zero all coefficients less than alpha from\n      the projection ``dictionary * X'``.\n\ntransform_n_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n    and is overridden by `alpha` in the `omp` case. If `None`, then\n    `transform_n_nonzero_coefs=int(n_features / 10)`.\n\ntransform_alpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n    the reconstruction error targeted. In this case, it overrides\n    `n_nonzero_coefs`.\n    If `None`, default to 1.\n\nsplit_sign : bool, default=False\n    Whether to split the sparse feature vector into the concatenation of\n    its negative part and its positive part. This can improve the\n    performance of downstream classifiers.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\ntransform_max_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `lasso_lars`.\n\n    .. versionadded:: 0.22\n\nAttributes\n----------\nn_components_ : int\n    Number of atoms.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchDictionaryLearning : A faster, less accurate, version of the\n    dictionary learning algorithm.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nSparsePCA : Mini-batch Sparse Principal Components Analysis.\nsparse_encode : Sparse coding where each row of the result is the solution\n    to a sparse coding problem.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.decomposition import SparseCoder\n>>> X = np.array([[-1, -1, -1], [0, 0, 3]])\n>>> dictionary = np.array(\n...     [[0, 1, 0],\n...      [-1, -1, 2],\n...      [1, 1, 1],\n...      [0, 1, 1],\n...      [0, 2, 1]],\n...    dtype=np.float64\n... )\n>>> coder = SparseCoder(\n...     dictionary=dictionary, transform_algorithm='lasso_lars',\n...     transform_alpha=1e-10,\n... )\n>>> coder.transform(X)\narray([[ 0.,  0., -1.,  0.,  0.],\n       [ 0.,  1.,  1.,  0.,  0.]])"
         },
         {
@@ -21981,7 +21981,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n.. math::\n\nL(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n&+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n&+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n&+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n&+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
+            "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
             "docstring": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components, if `n_components` is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n    Valid options:\n\n    - `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\n      otherwise random.\n\n    - `'random'`: non-negative random matrices, scaled with:\n      `sqrt(X.mean() / n_components)`\n\n    - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness).\n\n    - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired).\n\n    - `'nndsvdar'` NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired).\n\n    - `'custom'`: use custom matrices `W` and `H`\n\nbatch_size : int, default=1024\n    Number of samples in each mini-batch. Large batch sizes\n    give better long-term convergence at the cost of a slower start.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between `X`\n    and the dot product `WH`. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for `beta_loss <= 0` (or 'itakura-saito'), the input\n    matrix `X` cannot contain zeros.\n\ntol : float, default=1e-4\n    Control early stopping based on the norm of the differences in `H`\n    between 2 steps. To disable early stopping based on changes in `H`, set\n    `tol` to 0.0.\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function.\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\nmax_iter : int, default=200\n    Maximum number of iterations over the complete dataset before\n    timing out.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nforget_factor : float, default=0.7\n    Amount of rescaling of past information. Its value could be 1 with\n    finite datasets. Choosing values < 1 is recommended with online\n    learning as more recent batches will weight more than past batches.\n\nfresh_restarts : bool, default=False\n    Whether to completely solve for W at each step. Doing fresh restarts will likely\n    lead to a better solution for a same number of iterations but it is much slower.\n\nfresh_restarts_max_iter : int, default=30\n    Maximum number of iterations when solving for W at each step. Only used when\n    doing fresh restarts. These iterations may be stopped early based on a small\n    change of W controlled by `tol`.\n\ntransform_max_iter : int, default=None\n    Maximum number of iterations when solving for W at transform time.\n    If None, it defaults to `max_iter`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : bool, default=False\n    Whether to be verbose.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n    The number of components. It is same as the `n_components` parameter\n    if it was given. Otherwise, it will be same as the number of\n    features.\n\nreconstruction_err_ : float\n    Frobenius norm of the matrix difference, or beta-divergence, between\n    the training data `X` and the reconstructed data `WH` from\n    the fitted model.\n\nn_iter_ : int\n    Actual number of started iterations over the whole dataset.\n\nn_steps_ : int\n    Number of mini-batches processed.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\nSee Also\n--------\nNMF : Non-negative matrix factorization.\nMiniBatchDictionaryLearning : Finds a dictionary that can best be used to represent\n    data using a sparse code.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\n.. [3] :doi:`\"Online algorithms for nonnegative matrix factorization with the\n   Itakura-Saito divergence\" <10.1109/ASPAA.2011.6082314>`\n   Lefevre, A., Bach, F., Fevotte, C. (2011). WASPA.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import MiniBatchNMF\n>>> model = MiniBatchNMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_"
         },
         {
@@ -22005,7 +22005,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n.. math::\n\nL(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n&+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n&+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n&+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n&+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
+            "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
             "docstring": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.\n\nParameters\n----------\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n    Default: None.\n    Valid options:\n\n    - `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n      otherwise random.\n\n    - `'random'`: non-negative random matrices, scaled with:\n      sqrt(X.mean() / n_components)\n\n    - `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n      initialization (better for sparseness)\n\n    - `'nndsvda'`: NNDSVD with zeros filled with the average of X\n      (better when sparsity is not desired)\n\n    - `'nndsvdar'` NNDSVD with zeros filled with small random values\n      (generally faster, less accurate alternative to NNDSVDa\n      for when sparsity is not desired)\n\n    - `'custom'`: use custom matrices W and H\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n    'cd' is a Coordinate Descent solver.\n    'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nalpha : float, default=0.0\n    Constant that multiplies the regularization terms. Set it to zero to\n    have no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\n    the regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n    factors for `W` (resp. `H`).\n\n    .. versionadded:: 0.17\n       *alpha* used in the Coordinate Descent solver.\n\n    .. deprecated:: 1.0\n        The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n        Use `alpha_W` and `alpha_H` instead.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n    .. versionadded:: 0.17\n       Regularization parameter *l1_ratio* used in the Coordinate Descent\n       solver.\n\nverbose : int, default=0\n    Whether to be verbose.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\n    .. versionadded:: 0.17\n       *shuffle* parameter used in the Coordinate Descent solver.\n\nregularization : {'both', 'components', 'transformation', None},                      default='both'\n    Select whether the regularization affects the components (H), the\n    transformation (W), both or none of them.\n\n    .. versionadded:: 0.24\n\n    .. deprecated:: 1.0\n        The `regularization` parameter is deprecated in 1.0 and will be removed in\n        1.2. Use `alpha_W` and `alpha_H` instead.\n\nAttributes\n----------\ncomponents_ : ndarray of shape (n_components, n_features)\n    Factorization matrix, sometimes called 'dictionary'.\n\nn_components_ : int\n    The number of components. It is same as the `n_components` parameter\n    if it was given. Otherwise, it will be same as the number of\n    features.\n\nreconstruction_err_ : float\n    Frobenius norm of the matrix difference, or beta-divergence, between\n    the training data ``X`` and the reconstructed data ``WH`` from\n    the fitted model.\n\nn_iter_ : int\n    Actual number of iterations.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nDictionaryLearning : Find a dictionary that sparsely encodes data.\nMiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.\nPCA : Principal component analysis.\nSparseCoder : Find a sparse representation of data from a fixed,\n    precomputed dictionary.\nSparsePCA : Sparse Principal Components Analysis.\nTruncatedSVD : Dimensionality reduction using truncated SVD.\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9).\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import NMF\n>>> model = NMF(n_components=2, init='random', random_state=0)\n>>> W = model.fit_transform(X)\n>>> H = model.components_"
         },
         {
@@ -22107,7 +22107,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n*LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.",
+            "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.",
             "docstring": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\nsolver : {'svd', 'lsqr', 'eigen'}, default='svd'\n    Solver to use, possible values:\n      - 'svd': Singular value decomposition (default).\n        Does not compute the covariance matrix, therefore this solver is\n        recommended for data with a large number of features.\n      - 'lsqr': Least squares solution.\n        Can be combined with shrinkage or custom covariance estimator.\n      - 'eigen': Eigenvalue decomposition.\n        Can be combined with shrinkage or custom covariance estimator.\n\nshrinkage : 'auto' or float, default=None\n    Shrinkage parameter, possible values:\n      - None: no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    This should be left to None if `covariance_estimator` is used.\n    Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\npriors : array-like of shape (n_classes,), default=None\n    The class prior probabilities. By default, the class proportions are\n    inferred from the training data.\n\nn_components : int, default=None\n    Number of components (<= min(n_classes - 1, n_features)) for\n    dimensionality reduction. If None, will be set to\n    min(n_classes - 1, n_features). This parameter only affects the\n    `transform` method.\n\nstore_covariance : bool, default=False\n    If True, explicitly compute the weighted within-class covariance\n    matrix when solver is 'svd'. The matrix is always computed\n    and stored for the other solvers.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value of X to be considered\n    significant, used to estimate the rank of X. Dimensions whose\n    singular values are non-significant are discarded. Only used if\n    solver is 'svd'.\n\n    .. versionadded:: 0.17\n\ncovariance_estimator : covariance estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance`.\n    if None the shrinkage parameter drives the estimate.\n\n    This should be left to None if `shrinkage` is used.\n    Note that `covariance_estimator` works only with 'lsqr' and 'eigen'\n    solvers.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_classes, n_features)\n    Weight vector(s).\n\nintercept_ : ndarray of shape (n_classes,)\n    Intercept term.\n\ncovariance_ : array-like of shape (n_features, n_features)\n    Weighted within-class covariance matrix. It corresponds to\n    `sum_k prior_k * C_k` where `C_k` is the covariance matrix of the\n    samples in class `k`. The `C_k` are estimated using the (potentially\n    shrunk) biased estimator of covariance. If solver is 'svd', only\n    exists when `store_covariance` is True.\n\nexplained_variance_ratio_ : ndarray of shape (n_components,)\n    Percentage of variance explained by each of the selected components.\n    If ``n_components`` is not set then all components are stored and the\n    sum of explained variances is equal to 1.0. Only available when eigen\n    or svd solver is used.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nscalings_ : array-like of shape (rank, n_classes - 1)\n    Scaling of the features in the space spanned by the class centroids.\n    Only available for 'svd' and 'eigen' solvers.\n\nxbar_ : array-like of shape (n_features,)\n    Overall mean. Only present if solver is 'svd'.\n\nclasses_ : array-like of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nQuadraticDiscriminantAnalysis : Quadratic Discriminant Analysis.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = LinearDiscriminantAnalysis()\n>>> clf.fit(X, y)\nLinearDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]"
         },
         {
@@ -22127,7 +22127,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n*QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
+            "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
             "docstring": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.\n\nParameters\n----------\npriors : ndarray of shape (n_classes,), default=None\n    Class priors. By default, the class proportions are inferred from the\n    training data.\n\nreg_param : float, default=0.0\n    Regularizes the per-class covariance estimates by transforming S2 as\n    ``S2 = (1 - reg_param) * S2 + reg_param * np.eye(n_features)``,\n    where S2 corresponds to the `scaling_` attribute of a given class.\n\nstore_covariance : bool, default=False\n    If True, the class covariance matrices are explicitly computed and\n    stored in the `self.covariance_` attribute.\n\n    .. versionadded:: 0.17\n\ntol : float, default=1.0e-4\n    Absolute threshold for a singular value to be considered significant,\n    used to estimate the rank of `Xk` where `Xk` is the centered matrix\n    of samples in class k. This parameter does not affect the\n    predictions. It only controls a warning that is raised when features\n    are considered to be colinear.\n\n    .. versionadded:: 0.17\n\nAttributes\n----------\ncovariance_ : list of len n_classes of ndarray             of shape (n_features, n_features)\n    For each class, gives the covariance matrix estimated using the\n    samples of that class. The estimations are unbiased. Only present if\n    `store_covariance` is True.\n\nmeans_ : array-like of shape (n_classes, n_features)\n    Class-wise means.\n\npriors_ : array-like of shape (n_classes,)\n    Class priors (sum to 1).\n\nrotations_ : list of len n_classes of ndarray of shape (n_features, n_k)\n    For each class k an array of shape (n_features, n_k), where\n    ``n_k = min(n_features, number of elements in class k)``\n    It is the rotation of the Gaussian distribution, i.e. its\n    principal axis. It corresponds to `V`, the matrix of eigenvectors\n    coming from the SVD of `Xk = U S Vt` where `Xk` is the centered\n    matrix of samples from class k.\n\nscalings_ : list of len n_classes of ndarray of shape (n_k,)\n    For each class, contains the scaling of\n    the Gaussian distributions along its principal axes, i.e. the\n    variance in the rotated coordinate system. It corresponds to `S^2 /\n    (n_samples - 1)`, where `S` is the diagonal matrix of singular values\n    from the SVD of `Xk`, where `Xk` is the centered matrix of samples\n    from class k.\n\nclasses_ : ndarray of shape (n_classes,)\n    Unique class labels.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLinearDiscriminantAnalysis : Linear Discriminant Analysis.\n\nExamples\n--------\n>>> from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> y = np.array([1, 1, 1, 2, 2, 2])\n>>> clf = QuadraticDiscriminantAnalysis()\n>>> clf.fit(X, y)\nQuadraticDiscriminantAnalysis()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]"
         },
         {
@@ -23040,7 +23040,7 @@
             "methods": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\nMoved from sklearn.utils.",
+            "description": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils.",
             "docstring": "Custom warning to capture convergence problems\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils."
         },
         {
@@ -23052,7 +23052,7 @@
             "methods": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n- passes an integer array to a function which expects float input and\nwill convert the input\n- requests a non-copying operation, but a copy is required to meet the\nimplementation's data-type expectations;\n- passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\nMoved from sklearn.utils.validation.",
+            "description": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n    - passes an integer array to a function which expects float input and\n      will convert the input\n    - requests a non-copying operation, but a copy is required to meet the\n      implementation's data-type expectations;\n    - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils.validation.",
             "docstring": "Warning used to notify implicit data conversions happening in the code.\n\nThis warning occurs when some input data needs to be converted or\ninterpreted in a way that may not match the user's expectations.\n\nFor example, this warning may occur when the user\n    - passes an integer array to a function which expects float input and\n      will convert the input\n    - requests a non-copying operation, but a copy is required to meet the\n      implementation's data-type expectations;\n    - passes an input whose shape can be interpreted ambiguously.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils.validation."
         },
         {
@@ -23064,7 +23064,7 @@
             "methods": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\nMoved from sklearn.utils.",
+            "description": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils.",
             "docstring": "Custom warning to notify potential issues with data dimensionality.\n\nFor example, in random projection, this warning is raised when the\nnumber of components, which quantifies the dimensionality of the target\nprojection space, is higher than the number of features, which quantifies\nthe dimensionality of the original source space, to imply that the\ndimensionality of the problem will not be reduced.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.utils."
         },
         {
@@ -23088,7 +23088,7 @@
             "methods": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\nMoved from sklearn.cross_validation.",
+            "description": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.cross_validation.",
             "docstring": "Warning class used if there is an error while fitting the estimator.\n\nThis Warning is used in meta estimators GridSearchCV and RandomizedSearchCV\nand the cross-validation helper function cross_val_score to warn when there\nis an error while fitting the estimator.\n\n.. versionchanged:: 0.18\n   Moved from sklearn.cross_validation."
         },
         {
@@ -23136,7 +23136,7 @@
             "methods": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\nMoved from sklearn.base.",
+            "description": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n   Moved from sklearn.base.",
             "docstring": "Warning used when the metric is invalid\n\n.. versionchanged:: 0.18\n   Moved from sklearn.base."
         },
         {
@@ -23684,7 +23684,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\nstore a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\nconstructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\nis no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\nstring feature names) which can be a problem when trying to introspect\nwhich features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\nfeature index. However in practice this is rarely an issue if n_features\nis large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
+            "description": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
             "docstring": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n\nParameters\n----------\ninput : {'filename', 'file', 'content'}, default='content'\n    - If `'filename'`, the sequence passed as an argument to fit is\n      expected to be a list of filenames that need reading to fetch\n      the raw content to analyze.\n\n    - If `'file'`, the sequence items must have a 'read' method (file-like\n      object) that is called to fetch the bytes in memory.\n\n    - If `'content'`, the input is expected to be a sequence of items that\n      can be of type string or byte.\n\nencoding : str, default='utf-8'\n    If bytes or files are given to analyze, this encoding is used to\n    decode.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. By default, it is\n    'strict', meaning that a UnicodeDecodeError will be raised. Other\n    values are 'ignore' and 'replace'.\n\nstrip_accents : {'ascii', 'unicode'}, default=None\n    Remove accents and perform other character normalization\n    during the preprocessing step.\n    'ascii' is a fast method that only works on characters that have\n    a direct ASCII mapping.\n    'unicode' is a slightly slower method that works on any characters.\n    None (default) does nothing.\n\n    Both 'ascii' and 'unicode' use NFKD normalization from\n    :func:`unicodedata.normalize`.\n\nlowercase : bool, default=True\n    Convert all characters to lowercase before tokenizing.\n\npreprocessor : callable, default=None\n    Override the preprocessing (string transformation) stage while\n    preserving the tokenizing and n-grams generation steps.\n    Only applies if ``analyzer`` is not callable.\n\ntokenizer : callable, default=None\n    Override the string tokenization step while preserving the\n    preprocessing and n-grams generation steps.\n    Only applies if ``analyzer == 'word'``.\n\nstop_words : {'english'}, list, default=None\n    If 'english', a built-in stop word list for English is used.\n    There are several known issues with 'english' and you should\n    consider an alternative (see :ref:`stop_words`).\n\n    If a list, that list is assumed to contain stop words, all of which\n    will be removed from the resulting tokens.\n    Only applies if ``analyzer == 'word'``.\n\ntoken_pattern : str, default=r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"\n    Regular expression denoting what constitutes a \"token\", only used\n    if ``analyzer == 'word'``. The default regexp selects tokens of 2\n    or more alphanumeric characters (punctuation is completely ignored\n    and always treated as a token separator).\n\n    If there is a capturing group in token_pattern then the\n    captured group content, not the entire match, becomes the token.\n    At most one capturing group is permitted.\n\nngram_range : tuple (min_n, max_n), default=(1, 1)\n    The lower and upper boundary of the range of n-values for different\n    n-grams to be extracted. All values of n such that min_n <= n <= max_n\n    will be used. For example an ``ngram_range`` of ``(1, 1)`` means only\n    unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\n    only bigrams.\n    Only applies if ``analyzer`` is not callable.\n\nanalyzer : {'word', 'char', 'char_wb'} or callable, default='word'\n    Whether the feature should be made of word or character n-grams.\n    Option 'char_wb' creates character n-grams only from text inside\n    word boundaries; n-grams at the edges of words are padded with space.\n\n    If a callable is passed it is used to extract the sequence of features\n    out of the raw, unprocessed input.\n\n    .. versionchanged:: 0.21\n        Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n        is first read from the file and then passed to the given callable\n        analyzer.\n\nn_features : int, default=(2 ** 20)\n    The number of features (columns) in the output matrices. Small numbers\n    of features are likely to cause hash collisions, but large numbers\n    will cause larger coefficient dimensions in linear learners.\n\nbinary : bool, default=False\n    If True, all non zero counts are set to 1. This is useful for discrete\n    probabilistic models that model binary events rather than integer\n    counts.\n\nnorm : {'l1', 'l2'}, default='l2'\n    Norm used to normalize term vectors. None for no normalization.\n\nalternate_sign : bool, default=True\n    When True, an alternating sign is added to the features as to\n    approximately conserve the inner product in the hashed space even for\n    small n_features. This approach is similar to sparse random projection.\n\n    .. versionadded:: 0.19\n\ndtype : type, default=np.float64\n    Type of the matrix returned by fit_transform() or transform().\n\nSee Also\n--------\nCountVectorizer : Convert a collection of text documents to a matrix of\n    token counts.\nTfidfVectorizer : Convert a collection of raw documents to a matrix of\n    TF-IDF features.\n\nExamples\n--------\n>>> from sklearn.feature_extraction.text import HashingVectorizer\n>>> corpus = [\n...     'This is the first document.',\n...     'This document is the second document.',\n...     'And this is the third one.',\n...     'Is this the first document?',\n... ]\n>>> vectorizer = HashingVectorizer(n_features=2**4)\n>>> X = vectorizer.fit_transform(corpus)\n>>> print(X.shape)\n(4, 16)"
         },
         {
@@ -24037,7 +24037,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.gaussian_process"],
-            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n* allows prediction without prior fitting (based on the GP prior)\n* provides an additional method `sample_y(X)`, which evaluates samples\ndrawn from the GPR (prior or posterior) at given inputs\n* exposes a method `log_marginal_likelihood(theta)`, which can be used\nexternally for other ways of selecting hyperparameters, e.g., via\nMarkov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
             "docstring": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : kernel instance, default=None\n    The kernel specifying the covariance function of the GP. If None is\n    passed, the kernel ``ConstantKernel(1.0, constant_value_bounds=\"fixed\")\n    * RBF(1.0, length_scale_bounds=\"fixed\")`` is used as default. Note that\n    the kernel hyperparameters are optimized during fitting unless the\n    bounds are marked as \"fixed\".\n\nalpha : float or ndarray of shape (n_samples,), default=1e-10\n    Value added to the diagonal of the kernel matrix during fitting.\n    This can prevent a potential numerical issue during fitting, by\n    ensuring that the calculated values form a positive definite matrix.\n    It can also be interpreted as the variance of additional Gaussian\n    measurement noise on the training observations. Note that this is\n    different from using a `WhiteKernel`. If an array is passed, it must\n    have the same number of entries as the data used for fitting and is\n    used as datapoint-dependent noise level. Allowing to specify the\n    noise level directly as a parameter is mainly for convenience and\n    for consistency with :class:`~sklearn.linear_model.Ridge`.\n\noptimizer : \"fmin_l_bfgs_b\" or callable, default=\"fmin_l_bfgs_b\"\n    Can either be one of the internally supported optimizers for optimizing\n    the kernel's parameters, specified by a string, or an externally\n    defined optimizer passed as a callable. If a callable is passed, it\n    must have the signature::\n\n        def optimizer(obj_func, initial_theta, bounds):\n            # * 'obj_func': the objective function to be minimized, which\n            #   takes the hyperparameters theta as a parameter and an\n            #   optional flag eval_gradient, which determines if the\n            #   gradient is returned additionally to the function value\n            # * 'initial_theta': the initial value for theta, which can be\n            #   used by local optimizers\n            # * 'bounds': the bounds on the values of theta\n            ....\n            # Returned are the best found hyperparameters theta and\n            # the corresponding value of the target function.\n            return theta_opt, func_min\n\n    Per default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\n    is used. If None is passed, the kernel's parameters are kept fixed.\n    Available internal optimizers are: `{'fmin_l_bfgs_b'}`.\n\nn_restarts_optimizer : int, default=0\n    The number of restarts of the optimizer for finding the kernel's\n    parameters which maximize the log-marginal likelihood. The first run\n    of the optimizer is performed from the kernel's initial parameters,\n    the remaining ones (if any) from thetas sampled log-uniform randomly\n    from the space of allowed theta-values. If greater than 0, all bounds\n    must be finite. Note that `n_restarts_optimizer == 0` implies that one\n    run is performed.\n\nnormalize_y : bool, default=False\n    Whether or not to normalize the target values `y` by removing the mean\n    and scaling to unit-variance. This is recommended for cases where\n    zero-mean, unit-variance priors are used. Note that, in this\n    implementation, the normalisation is reversed before the GP predictions\n    are reported.\n\n    .. versionchanged:: 0.23\n\ncopy_X_train : bool, default=True\n    If True, a persistent copy of the training data is stored in the\n    object. Otherwise, just a reference to the training data is stored,\n    which might cause predictions to change if the data is modified\n    externally.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation used to initialize the centers.\n    Pass an int for reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nX_train_ : array-like of shape (n_samples, n_features) or list of object\n    Feature vectors or other representations of training data (also\n    required for prediction).\n\ny_train_ : array-like of shape (n_samples,) or (n_samples, n_targets)\n    Target values in training data (also required for prediction).\n\nkernel_ : kernel instance\n    The kernel used for prediction. The structure of the kernel is the\n    same as the one passed as parameter but with optimized hyperparameters.\n\nL_ : array-like of shape (n_samples, n_samples)\n    Lower-triangular Cholesky decomposition of the kernel in ``X_train_``.\n\nalpha_ : array-like of shape (n_samples,)\n    Dual coefficients of training data points in kernel space.\n\nlog_marginal_likelihood_value_ : float\n    The log-marginal-likelihood of ``self.kernel_.theta``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianProcessClassifier : Gaussian process classification (GPC)\n    based on Laplace approximation.\n\nReferences\n----------\n.. [1] `Rasmussen, Carl Edward.\n   \"Gaussian processes in machine learning.\"\n   Summer school on machine learning. Springer, Berlin, Heidelberg, 2003\n   <http://www.gaussianprocess.org/gpml/chapters/RW.pdf>`_.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))"
         },
         {
@@ -24078,7 +24078,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\nk(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\nkernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\nkernel = RBF() + 2\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n    k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n        kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n        kernel = RBF() + 2\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n    k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n        kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n        kernel = RBF() + 2\n\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nconstant_value : float, default=1.0\n    The constant value which defines the covariance:\n    k(x_1, x_2) = constant_value\n\nconstant_value_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on `constant_value`.\n    If set to \"fixed\", `constant_value` cannot be changed during\n    hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = RBF() + ConstantKernel(constant_value=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3696...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([606.1...]), array([0.24...]))"
         },
         {
@@ -24097,7 +24097,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\nk(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n    k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n    k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nsigma_0 : float >= 0, default=1.0\n    Parameter controlling the inhomogenity of the kernel. If sigma_0=0,\n    the kernel is homogeneous.\n\nsigma_0_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'sigma_0'.\n    If set to \"fixed\", 'sigma_0' cannot be changed during\n    hyperparameter tuning.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n    \"Gaussian Processes for Machine Learning\". The MIT Press.\n    <http://www.gaussianprocess.org/gpml/>`_\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel()\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1...]), array([316.6..., 316.6...]))"
         },
         {
@@ -24115,7 +24115,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\text{exp}\\left(-\n\\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\text{exp}\\left(-\n    \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\text{exp}\\left(-\n    \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\n\nlength_scale : float > 0, default=1.0\n    The length scale of the kernel.\n\nperiodicity : float > 0, default=1.0\n    The periodicity of the kernel.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'length_scale'.\n    If set to \"fixed\", 'length_scale' cannot be changed during\n    hyperparameter tuning.\n\nperiodicity_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'periodicity'.\n    If set to \"fixed\", 'periodicity' cannot be changed during\n    hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import ExpSineSquared\n>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0)\n>>> kernel = ExpSineSquared(length_scale=1, periodicity=1)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.0144...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([425.6..., 457.5...]), array([0.3894..., 0.3467...]))"
         },
         {
@@ -24140,7 +24140,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\nk_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n    k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n    k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nkernel : Kernel\n    The base kernel\n\nexponent : float\n    The exponent for the base kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RationalQuadratic,\n...            Exponentiation)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Exponentiation(RationalQuadratic(), exponent=2)\n>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.419...\n>>> gpr.predict(X[:1,:], return_std=True)\n(array([635.5...]), array([0.559...]))"
         },
         {
@@ -24238,7 +24238,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\nk(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n\\Bigg)^\\nu K_\\nu\\Bigg(\n\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n     k(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n     \\Bigg)^\\nu K_\\nu\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n     k(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n     \\Bigg)^\\nu K_\\nu\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\n\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n    The length scale of the kernel. If a float, an isotropic kernel is\n    used. If an array, an anisotropic kernel is used where each dimension\n    of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'length_scale'.\n    If set to \"fixed\", 'length_scale' cannot be changed during\n    hyperparameter tuning.\n\nnu : float, default=1.5\n    The parameter nu controlling the smoothness of the learned function.\n    The smaller nu, the less smooth the approximated function is.\n    For nu=inf, the kernel becomes equivalent to the RBF kernel and for\n    nu=0.5 to the absolute exponential kernel. Important intermediate\n    values are nu=1.5 (once differentiable functions) and nu=2.5\n    (twice differentiable functions). Note that values of nu not in\n    [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost\n    (appr. 10 times higher) since they require to evaluate the modified\n    Bessel function. Furthermore, in contrast to l, nu is kept fixed to\n    its initial value and not optimized.\n\nReferences\n----------\n.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n    \"Gaussian Processes for Machine Learning\". The MIT Press.\n    <http://www.gaussianprocess.org/gpml/>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import Matern\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8513..., 0.0368..., 0.1117...],\n        [0.8086..., 0.0693..., 0.1220...]])"
         },
         {
@@ -24269,7 +24269,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\nkernels support only isotropic distances. The parameter gamma is\nconsidered to be a hyperparameter and may be optimized. The other\nkernel parameters are set directly at initialization and are kept\nfixed.\n\n.. versionadded:: 0.18",
+            "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n      kernels support only isotropic distances. The parameter gamma is\n      considered to be a hyperparameter and may be optimized. The other\n      kernel parameters are set directly at initialization and are kept\n      fixed.\n\n.. versionadded:: 0.18",
             "docstring": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n      kernels support only isotropic distances. The parameter gamma is\n      considered to be a hyperparameter and may be optimized. The other\n      kernel parameters are set directly at initialization and are kept\n      fixed.\n\n.. versionadded:: 0.18\n\nParameters\n----------\ngamma : float, default=1.0\n    Parameter gamma of the pairwise kernel specified by metric. It should\n    be positive.\n\ngamma_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'gamma'.\n    If set to \"fixed\", 'gamma' cannot be changed during\n    hyperparameter tuning.\n\nmetric : {\"linear\", \"additive_chi2\", \"chi2\", \"poly\", \"polynomial\",               \"rbf\", \"laplacian\", \"sigmoid\", \"cosine\"} or callable,               default=\"linear\"\n    The metric to use when calculating kernel between instances in a\n    feature array. If metric is a string, it must be one of the metrics\n    in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a kernel matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays from X as input and return a value indicating\n    the distance between them.\n\npairwise_kernels_kwargs : dict, default=None\n    All entries of this dict (if any) are passed as keyword arguments to\n    the pairwise kernel function.\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import PairwiseKernel\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = PairwiseKernel(metric='rbf')\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8880..., 0.05663..., 0.05532...],\n       [0.8676..., 0.07073..., 0.06165...]])"
         },
         {
@@ -24285,7 +24285,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\nk_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n    k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n    k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y)\n\nNote that the `__mul__` magic method is overridden, so\n`Product(RBF(), RBF())` is equivalent to using the * operator\nwith `RBF() * RBF()`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n    The first base-kernel of the product-kernel\n\nk2 : Kernel\n    The second base-kernel of the product-kernel\n\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import (RBF, Product,\n...            ConstantKernel)\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Product(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 * RBF(length_scale=1)"
         },
         {
@@ -24303,7 +24303,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float or ndarray of shape (n_features,), default=1.0\n    The length scale of the kernel. If a float, an isotropic kernel is\n    used. If an array, an anisotropic kernel is used where each dimension\n    of l defines the length-scale of the respective feature dimension.\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'length_scale'.\n    If set to \"fixed\", 'length_scale' cannot be changed during\n    hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n    Advice on Covariance functions\".\n    <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_\n\n.. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006).\n    \"Gaussian Processes for Machine Learning\". The MIT Press.\n    <http://www.gaussianprocess.org/gpml/>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RBF\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = 1.0 * RBF(1.0)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9866...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8354..., 0.03228..., 0.1322...],\n       [0.7906..., 0.0652..., 0.1441...]])"
         },
         {
@@ -24321,7 +24321,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\left(\n1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha  l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\left(\n    1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha  l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\left(\n    1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha  l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nlength_scale : float > 0, default=1.0\n    The length scale of the kernel.\n\nalpha : float > 0, default=1.0\n    Scale mixture parameter\n\nlength_scale_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'length_scale'.\n    If set to \"fixed\", 'length_scale' cannot be changed during\n    hyperparameter tuning.\n\nalpha_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'alpha'.\n    If set to \"fixed\", 'alpha' cannot be changed during\n    hyperparameter tuning.\n\nReferences\n----------\n.. [1] `David Duvenaud (2014). \"The Kernel Cookbook:\n    Advice on Covariance functions\".\n    <https://www.cs.toronto.edu/~duvenaud/cookbook/>`_\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.gaussian_process import GaussianProcessClassifier\n>>> from sklearn.gaussian_process.kernels import RationalQuadratic\n>>> X, y = load_iris(return_X_y=True)\n>>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5)\n>>> gpc = GaussianProcessClassifier(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpc.score(X, y)\n0.9733...\n>>> gpc.predict_proba(X[:2,:])\narray([[0.8881..., 0.0566..., 0.05518...],\n        [0.8678..., 0.0707... , 0.0614...]])"
         },
         {
@@ -24349,7 +24349,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\nk_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n    k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2`\nand combines them via\n\n.. math::\n    k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y)\n\nNote that the `__add__` magic method is overridden, so\n`Sum(RBF(), RBF())` is equivalent to using the + operator\nwith `RBF() + RBF()`.\n\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nk1 : Kernel\n    The first base-kernel of the sum-kernel\n\nk2 : Kernel\n    The second base-kernel of the sum-kernel\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = Sum(ConstantKernel(2), RBF())\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n1.0\n>>> kernel\n1.41**2 + RBF(length_scale=1)"
         },
         {
@@ -24367,7 +24367,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\nk(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n    k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n    k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18\n\nParameters\n----------\nnoise_level : float, default=1.0\n    Parameter controlling the noise level (variance)\n\nnoise_level_bounds : pair of floats >= 0 or \"fixed\", default=(1e-5, 1e5)\n    The lower and upper bound on 'noise_level'.\n    If set to \"fixed\", 'noise_level' cannot be changed during\n    hyperparameter tuning.\n\nExamples\n--------\n>>> from sklearn.datasets import make_friedman2\n>>> from sklearn.gaussian_process import GaussianProcessRegressor\n>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel\n>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)\n>>> kernel = DotProduct() + WhiteKernel(noise_level=0.5)\n>>> gpr = GaussianProcessRegressor(kernel=kernel,\n...         random_state=0).fit(X, y)\n>>> gpr.score(X, y)\n0.3680...\n>>> gpr.predict(X[:2,:], return_std=True)\n(array([653.0..., 592.1... ]), array([316.6..., 316.6...]))"
         },
         {
@@ -24411,7 +24411,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.impute"],
-            "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n`SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\nestimator which is now removed.",
+            "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.",
             "docstring": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.\n\nParameters\n----------\nmissing_values : int, float, str, np.nan, None or pandas.NA, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    can be set to either `np.nan` or `pd.NA`.\n\nstrategy : str, default='mean'\n    The imputation strategy.\n\n    - If \"mean\", then replace missing values using the mean along\n      each column. Can only be used with numeric data.\n    - If \"median\", then replace missing values using the median along\n      each column. Can only be used with numeric data.\n    - If \"most_frequent\", then replace missing using the most frequent\n      value along each column. Can be used with strings or numeric data.\n      If there is more than one such value, only the smallest is returned.\n    - If \"constant\", then replace missing values with fill_value. Can be\n      used with strings or numeric data.\n\n    .. versionadded:: 0.20\n       strategy=\"constant\" for fixed value imputation.\n\nfill_value : str or numerical value, default=None\n    When strategy == \"constant\", fill_value is used to replace all\n    occurrences of missing_values.\n    If left to the default, fill_value will be 0 when imputing numerical\n    data and \"missing_value\" for strings or object data types.\n\nverbose : int, default=0\n    Controls the verbosity of the imputer.\n\n    .. deprecated:: 1.1\n       The 'verbose' parameter was deprecated in version 1.1 and will be\n       removed in 1.3. A warning will always be raised upon the removal of\n       empty columns in the future version.\n\ncopy : bool, default=True\n    If True, a copy of X will be created. If False, imputation will\n    be done in-place whenever possible. Note that, in the following cases,\n    a new copy will always be made, even if `copy=False`:\n\n    - If `X` is not an array of floating values;\n    - If `X` is encoded as a CSR matrix;\n    - If `add_indicator=True`.\n\nadd_indicator : bool, default=False\n    If True, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nAttributes\n----------\nstatistics_ : array of shape (n_features,)\n    The imputation fill value for each feature.\n    Computing statistics can result in `np.nan` values.\n    During :meth:`transform`, features corresponding to `np.nan`\n    statistics will be discarded.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nIterativeImputer : Multivariate imputation of missing values.\n\nNotes\n-----\nColumns which only contained missing values at :meth:`fit` are discarded\nupon :meth:`transform` if strategy is not `\"constant\"`.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.impute import SimpleImputer\n>>> imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nSimpleImputer()\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> print(imp_mean.transform(X))\n[[ 7.   2.   3. ]\n [ 4.   3.5  6. ]\n [10.   3.5  9. ]]"
         },
         {
@@ -24454,7 +24454,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.impute"],
-            "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import `enable_iterative_imputer`::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_iterative_imputer  # noqa\n>>> # now you can import normally from sklearn.impute\n>>> from sklearn.impute import IterativeImputer",
+            "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer",
             "docstring": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer\n\nParameters\n----------\nestimator : estimator object, default=BayesianRidge()\n    The estimator to use at each step of the round-robin imputation.\n    If `sample_posterior=True`, the estimator must support\n    `return_std` in its `predict` method.\n\nmissing_values : int or np.nan, default=np.nan\n    The placeholder for the missing values. All occurrences of\n    `missing_values` will be imputed. For pandas' dataframes with\n    nullable integer dtypes with missing values, `missing_values`\n    should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.\n\nsample_posterior : bool, default=False\n    Whether to sample from the (Gaussian) predictive posterior of the\n    fitted estimator for each imputation. Estimator must support\n    `return_std` in its `predict` method if set to `True`. Set to\n    `True` if using `IterativeImputer` for multiple imputations.\n\nmax_iter : int, default=10\n    Maximum number of imputation rounds to perform before returning the\n    imputations computed during the final round. A round is a single\n    imputation of each feature with missing values. The stopping criterion\n    is met once `max(abs(X_t - X_{t-1}))/max(abs(X[known_vals])) < tol`,\n    where `X_t` is `X` at iteration `t`. Note that early stopping is only\n    applied if `sample_posterior=False`.\n\ntol : float, default=1e-3\n    Tolerance of the stopping condition.\n\nn_nearest_features : int, default=None\n    Number of other features to use to estimate the missing values of\n    each feature column. Nearness between features is measured using\n    the absolute correlation coefficient between each feature pair (after\n    initial imputation). To ensure coverage of features throughout the\n    imputation process, the neighbor features are not necessarily nearest,\n    but are drawn with probability proportional to correlation for each\n    imputed target feature. Can provide significant speed-up when the\n    number of features is huge. If `None`, all features will be used.\n\ninitial_strategy : {'mean', 'median', 'most_frequent', 'constant'},             default='mean'\n    Which strategy to use to initialize the missing values. Same as the\n    `strategy` parameter in :class:`~sklearn.impute.SimpleImputer`.\n\nimputation_order : {'ascending', 'descending', 'roman', 'arabic',             'random'}, default='ascending'\n    The order in which the features will be imputed. Possible values:\n\n    - `'ascending'`: From features with fewest missing values to most.\n    - `'descending'`: From features with most missing values to fewest.\n    - `'roman'`: Left to right.\n    - `'arabic'`: Right to left.\n    - `'random'`: A random order for each round.\n\nskip_complete : bool, default=False\n    If `True` then features with missing values during :meth:`transform`\n    which did not have any missing values during :meth:`fit` will be\n    imputed with the initial imputation method only. Set to `True` if you\n    have many features with no missing values at both :meth:`fit` and\n    :meth:`transform` time to save compute.\n\nmin_value : float or array-like of shape (n_features,), default=-np.inf\n    Minimum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one min value for\n    each feature. The default is `-np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nmax_value : float or array-like of shape (n_features,), default=np.inf\n    Maximum possible imputed value. Broadcast to shape `(n_features,)` if\n    scalar. If array-like, expects shape `(n_features,)`, one max value for\n    each feature. The default is `np.inf`.\n\n    .. versionchanged:: 0.23\n       Added support for array-like.\n\nverbose : int, default=0\n    Verbosity flag, controls the debug messages that are issued\n    as functions are evaluated. The higher, the more verbose. Can be 0, 1,\n    or 2.\n\nrandom_state : int, RandomState instance or None, default=None\n    The seed of the pseudo random number generator to use. Randomizes\n    selection of estimator features if `n_nearest_features` is not `None`,\n    the `imputation_order` if `random`, and the sampling from posterior if\n    `sample_posterior=True`. Use an integer for determinism.\n    See :term:`the Glossary <random_state>`.\n\nadd_indicator : bool, default=False\n    If `True`, a :class:`MissingIndicator` transform will stack onto output\n    of the imputer's transform. This allows a predictive estimator\n    to account for missingness despite imputation. If a feature has no\n    missing values at fit/train time, the feature won't appear on\n    the missing indicator even if there are missing values at\n    transform/test time.\n\nAttributes\n----------\ninitial_imputer_ : object of type :class:`~sklearn.impute.SimpleImputer`\n    Imputer used to initialize the missing values.\n\nimputation_sequence_ : list of tuples\n    Each tuple has `(feat_idx, neighbor_feat_idx, estimator)`, where\n    `feat_idx` is the current feature to be imputed,\n    `neighbor_feat_idx` is the array of other features used to impute the\n    current feature, and `estimator` is the trained estimator used for\n    the imputation. Length is `self.n_features_with_missing_ *\n    self.n_iter_`.\n\nn_iter_ : int\n    Number of iteration rounds that occurred. Will be less than\n    `self.max_iter` if early stopping criterion was reached.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_features_with_missing_ : int\n    Number of features with missing values.\n\nindicator_ : :class:`~sklearn.impute.MissingIndicator`\n    Indicator used to add binary indicators for missing values.\n    `None` if `add_indicator=False`.\n\nrandom_state_ : RandomState instance\n    RandomState instance that is generated either from a seed, the random\n    number generator or by `np.random`.\n\nSee Also\n--------\nSimpleImputer : Univariate imputation of missing values.\n\nNotes\n-----\nTo support imputation in inductive mode we store each feature's estimator\nduring the :meth:`fit` phase, and predict without refitting (in order)\nduring the :meth:`transform` phase.\n\nFeatures which contain all missing values at :meth:`fit` are discarded upon\n:meth:`transform`.\n\nReferences\n----------\n.. [1] `Stef van Buuren, Karin Groothuis-Oudshoorn (2011). \"mice:\n    Multivariate Imputation by Chained Equations in R\". Journal of\n    Statistical Software 45: 1-67.\n    <https://www.jstatsoft.org/article/view/v045i03>`_\n\n.. [2] `S. F. Buck, (1960). \"A Method of Estimation of Missing Values in\n    Multivariate Data Suitable for use with an Electronic Computer\".\n    Journal of the Royal Statistical Society 22(2): 302-306.\n    <https://www.jstor.org/stable/2984099>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.experimental import enable_iterative_imputer\n>>> from sklearn.impute import IterativeImputer\n>>> imp_mean = IterativeImputer(random_state=0)\n>>> imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\nIterativeImputer(random_state=0)\n>>> X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n>>> imp_mean.transform(X)\narray([[ 6.9584...,  2.       ,  3.        ],\n       [ 4.       ,  2.6000...,  6.        ],\n       [10.       ,  4.9999...,  9.        ]])"
         },
         {
@@ -24509,7 +24509,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
-            "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n.. versionadded:: 0.22",
+            "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22",
             "docstring": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22\n\nParameters\n----------\npd_results : list of Bunch\n    Results of :func:`~sklearn.inspection.partial_dependence` for\n    ``features``.\n\nfeatures : list of (int,) or list of (int, int)\n    Indices of features for a given plot. A tuple of one integer will plot\n    a partial dependence curve of one feature. A tuple of two integers will\n    plot a two-way partial dependence curve as a contour plot.\n\nfeature_names : list of str\n    Feature names corresponding to the indices in ``features``.\n\ntarget_idx : int\n\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\ndeciles : dict\n    Deciles for feature indices in ``features``.\n\npdp_lim : dict or None\n    Global min and max average predictions, such that all plots will have\n    the same scale and y limits. `pdp_lim[1]` is the global min and max for\n    single partial dependence curves. `pdp_lim[2]` is the global min and\n    max for two-way partial dependence curves. If `None`, the limit will be\n    inferred from the global minimum and maximum of all predictions.\n\n    .. deprecated:: 1.1\n       Pass the parameter `pdp_lim` to\n       :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n       It will be removed in 1.3.\n\nkind : {'average', 'individual', 'both'} or list of such str,             default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot;\n    - ``kind='both'`` results in plotting both the ICE and PD on the same\n      plot.\n\n    A list of such strings can be provided to specify `kind` on a per-plot\n    basis. The length of the list should be the same as the number of\n    interaction requested in `features`.\n\n    .. note::\n       ICE ('individual' or 'both') is not a valid option for 2-ways\n       interactions plot. As a result, an error will be raised.\n       2-ways interaction plots should always be configured to\n       use the 'average' kind instead.\n\n    .. note::\n       The fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using\n       the slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n       Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n       options.\n\n    .. versionadded:: 1.1\n       Add the possibility to pass a list of string specifying `kind`\n       for each plot.\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If float, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If int, represents the\n    maximum absolute number of samples to use.\n\n    Note that the full dataset is still used to calculate partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None`. See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nbounding_ax_ : matplotlib Axes or None\n    If `ax` is an axes or None, the `bounding_ax_` is the axes where the\n    grid of partial dependence plots are drawn. If `ax` is a list of axes\n    or a numpy array of axes, `bounding_ax_` is None.\n\naxes_ : ndarray of matplotlib Axes\n    If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row\n    and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item\n    in `ax`. Elements that are None correspond to a nonexisting axes in\n    that position.\n\nlines_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `lines_[i, j]` is the partial dependence\n    curve on the i-th row and j-th column. If `ax` is a list of axes,\n    `lines_[i]` is the partial dependence curve corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a line plot.\n\ndeciles_vlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the x axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a PDP plot.\n\n    .. versionadded:: 0.23\n\ndeciles_hlines_ : ndarray of matplotlib LineCollection\n    If `ax` is an axes or None, `vlines_[i, j]` is the line collection\n    representing the y axis deciles of the i-th row and j-th column. If\n    `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in\n    `ax`. Elements that are None correspond to a nonexisting axes or an\n    axes that does not include a 2-way plot.\n\n    .. versionadded:: 0.23\n\ncontours_ : ndarray of matplotlib Artists\n    If `ax` is an axes or None, `contours_[i, j]` is the partial dependence\n    plot on the i-th row and j-th column. If `ax` is a list of axes,\n    `contours_[i]` is the partial dependence plot corresponding to the i-th\n    item in `ax`. Elements that are None correspond to a nonexisting axes\n    or an axes that does not include a contour plot.\n\nfigure_ : matplotlib Figure\n    Figure containing partial dependence plots.\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence."
         },
         {
@@ -24587,7 +24587,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\nK(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24",
+            "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n    K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24",
             "docstring": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n    K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ngamma : float, default=1.0\n    Parameter of the polynomial kernel whose feature map\n    will be approximated.\n\ndegree : int, default=2\n    Degree of the polynomial kernel whose feature map\n    will be approximated.\n\ncoef0 : int, default=0\n    Constant term of the polynomial kernel whose feature map\n    will be approximated.\n\nn_components : int, default=100\n    Dimensionality of the output feature space. Usually, `n_components`\n    should be greater than the number of features in input samples in\n    order to achieve good performance. The optimal score / run time\n    balance is typically achieved around `n_components` = 10 * `n_features`,\n    but this depends on the specific dataset being used.\n\nrandom_state : int, RandomState instance, default=None\n    Determines random number generation for indexHash and bitHash\n    initialization. Pass an int for reproducible results across multiple\n    function calls. See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nindexHash_ : ndarray of shape (degree, n_features), dtype=int64\n    Array of indexes in range [0, n_components) used to represent\n    the 2-wise independent hash functions for Count Sketch computation.\n\nbitHash_ : ndarray of shape (degree, n_features), dtype=float32\n    Array with random entries in {+1, -1}, used to represent\n    the 2-wise independent hash functions for Count Sketch computation.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nAdditiveChi2Sampler : Approximate feature map for additive chi2 kernel.\nNystroem : Approximate a kernel map using a subset of the training data.\nRBFSampler : Approximate a RBF kernel feature map using random Fourier\n    features.\nSkewedChi2Sampler : Approximate feature map for \"skewed chi-squared\" kernel.\nsklearn.metrics.pairwise.kernel_metrics : List of built-in kernels.\n\nExamples\n--------\n>>> from sklearn.kernel_approximation import PolynomialCountSketch\n>>> from sklearn.linear_model import SGDClassifier\n>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]]\n>>> y = [0, 0, 1, 1]\n>>> ps = PolynomialCountSketch(degree=3, random_state=1)\n>>> X_features = ps.fit_transform(X)\n>>> clf = SGDClassifier(max_iter=10, tol=1e-3)\n>>> clf.fit(X_features, y)\nSGDClassifier(max_iter=10)\n>>> clf.score(X_features, y)\n1.0"
         },
         {
@@ -24754,7 +24754,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n1 / (2 * n_samples) * ||y - Xw||^2_2\n+ alpha * l1_ratio * ||w||_1\n+ 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\na * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\nalpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
+            "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the penalty terms. Defaults to 1.0.\n    See the notes for the exact mathematical meaning of this\n    parameter. ``alpha = 0`` is equivalent to an ordinary least square,\n    solved by the :class:`LinearRegression` object. For numerical\n    reasons, using ``alpha = 0`` with the ``Lasso`` object is not advised.\n    Given this, you should use the :class:`LinearRegression` object.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``. For\n    ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it\n    is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a\n    combination of L1 and L2.\n\nfit_intercept : bool, default=True\n    Whether the intercept should be estimated or not. If ``False``, the\n    data is assumed to be already centered.\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nSGDRegressor : Implements elastic net regression with incremental training.\nSGDClassifier : Implements logistic regression with elastic net penalty\n    (``SGDClassifier(loss=\"log_loss\", penalty=\"elasticnet\")``).\n\nNotes\n-----\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn.linear_model import ElasticNet\n>>> from sklearn.datasets import make_regression\n\n>>> X, y = make_regression(n_features=2, random_state=0)\n>>> regr = ElasticNet(random_state=0)\n>>> regr.fit(X, y)\nElasticNet(random_state=0)\n>>> print(regr.coef_)\n[18.83816048 64.55968825]\n>>> print(regr.intercept_)\n1.451...\n>>> print(regr.predict([[0, 0]]))\n[1.451...]"
         },
         {
@@ -24783,7 +24783,7 @@
             "methods": ["sklearn/sklearn.linear_model._coordinate_descent/Lasso/__init__"],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
+            "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1 term, controlling regularization\n    strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.\n\n    When `alpha = 0`, the objective is equivalent to ordinary least\n    squares, solved by the :class:`LinearRegression` object. For numerical\n    reasons, using `alpha = 0` with the `Lasso` object is not advised.\n    Instead, you should use the :class:`LinearRegression` object.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to False, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : bool or array-like of shape (n_features, n_features),                 default=False\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. The Gram matrix can also be passed as argument.\n    For sparse input this option is always ``False`` to preserve sparsity.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``, see Notes below.\n\nwarm_start : bool, default=False\n    When set to True, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\npositive : bool, default=False\n    When set to ``True``, forces the coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    Given param alpha, the dual gaps at the end of the optimization,\n    same shape as each observation of y.\n\nsparse_coef_ : sparse matrix of shape (n_features, 1) or             (n_targets, n_features)\n    Readonly property derived from ``coef_``.\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int or list of int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Regularization path using LARS.\nlasso_path : Regularization path using Lasso.\nLassoLars : Lasso Path along the regularization parameter usingLARS algorithm.\nLassoCV : Lasso alpha parameter by cross-validation.\nLassoLarsCV : Lasso least angle parameter algorithm by cross-validation.\nsklearn.decomposition.sparse_encode : Sparse coding array estimator.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nRegularization improves the conditioning of the problem and\nreduces the variance of the estimates. Larger values specify stronger\nregularization. Alpha corresponds to `1 / (2C)` in other linear\nmodels such as :class:`~sklearn.linear_model.LogisticRegression` or\n:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are\nassumed to be specific to the targets. Hence they must correspond in\nnumber.\n\nThe precise stopping criteria based on `tol` are the following: First, check that\nthat maximum coordinate update, i.e. :math:`\\max_j |w_j^{new} - w_j^{old}|`\nis smaller than `tol` times the maximum absolute coefficient, :math:`\\max_j |w_j|`.\nIf so, then additionally check whether the dual gap is smaller than `tol` times\n:math:`||y||_2^2 / n_{      ext{samples}}`.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.Lasso(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])\nLasso(alpha=0.1)\n>>> print(clf.coef_)\n[0.85 0.  ]\n>>> print(clf.intercept_)\n0.15..."
         },
         {
@@ -24800,7 +24800,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
+            "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If ``None`` alphas are set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\npositive : bool, default=False\n    If positive, restrict regression coefficients to be positive.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\ncoef_ : ndarray of shape (n_features,) or (n_targets, n_features)\n    Parameter vector (w in the cost function formula).\n\nintercept_ : float or ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\ndual_gap_ : float or ndarray of shape (n_targets,)\n    The dual gap at the end of the optimization for the optimal alpha\n    (``alpha_``).\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso path using LARS\n    algorithm.\nlasso_path : Compute Lasso path with coordinate descent.\nLasso : The Lasso is a linear model that estimates sparse coefficients.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoCV : Lasso linear model with iterative fitting along a regularization\n    path.\nLassoLarsCV : Cross-validated Lasso using the LARS algorithm.\n\nNotes\n-----\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` argument of the `fit`\nmethod should be directly passed as a Fortran-contiguous numpy array.\n\n For an example, see\n :ref:`examples/linear_model/plot_lasso_model_selection.py\n <sphx_glr_auto_examples_linear_model_plot_lasso_model_selection.py>`.\n\nExamples\n--------\n>>> from sklearn.linear_model import LassoCV\n>>> from sklearn.datasets import make_regression\n>>> X, y = make_regression(noise=4, random_state=0)\n>>> reg = LassoCV(cv=5, random_state=0).fit(X, y)\n>>> reg.score(X, y)\n0.9993...\n>>> reg.predict(X[:1,])\narray([-78.4951...])"
         },
         {
@@ -24835,7 +24835,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n(1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n+ alpha * l1_ratio * ||W||_21\n+ 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
+            "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
             "docstring": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nl1_ratio : float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula). If a 1D y is\n    passed in at fit (non multi-task usage), ``coef_`` is then a 1D array.\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : float\n    The dual gaps at the end of the optimization.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\nMultiTaskLasso : Multi-task L1/L2 Lasso with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNet(alpha=0.1)\n>>> clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNet(alpha=0.1)\n>>> print(clf.coef_)\n[[0.45663524 0.45612256]\n [0.45663524 0.45612256]]\n>>> print(clf.intercept_)\n[0.0872422 0.0872422]"
         },
         {
@@ -24853,7 +24853,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n+ alpha * l1_ratio * ||W||_21\n+ 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
+            "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
             "docstring": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\nl1_ratio : float or list of float, default=0.5\n    The ElasticNet mixing parameter, with 0 < l1_ratio <= 1.\n    For l1_ratio = 1 the penalty is an L1/L2 penalty. For l1_ratio = 0 it\n    is an L2 penalty.\n    For ``0 < l1_ratio < 1``, the penalty is a combination of L1/L2 and L2.\n    This parameter can be a list, in which case the different\n    values are tested by cross-validation and the one giving the best\n    prediction score is used. Note that a good choice of list of\n    values for l1_ratio is often to put more values close to 1\n    (i.e. Lasso) and less close to 0 (i.e. Ridge), as in ``[.1, .5, .7,\n    .9, .95, .99, 1]``.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nverbose : bool or int, default=0\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds) or                 (n_l1_ratio, n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,) or (n_l1_ratio, n_alphas)\n    The grid of alphas used for fitting, for each l1_ratio.\n\nl1_ratio_ : float\n    Best l1_ratio obtained by cross-validation.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task L1/L2 ElasticNet with built-in cross-validation.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskLassoCV : Multi-task Lasso model trained with L1/L2\n    mixed-norm as regularizer.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameters `l1_ratio` and `alpha` are found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskElasticNetCV(cv=3)\n>>> clf.fit([[0,0], [1, 1], [2, 2]],\n...         [[0, 0], [1, 1], [2, 2]])\nMultiTaskElasticNetCV(cv=3)\n>>> print(clf.coef_)\n[[0.52875032 0.46958558]\n [0.52875032 0.46958558]]\n>>> print(clf.intercept_)\n[0.00166409 0.00166409]"
         },
         {
@@ -24865,7 +24865,7 @@
             "methods": ["sklearn/sklearn.linear_model._coordinate_descent/MultiTaskLasso/__init__"],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
+            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
             "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\nParameters\n----------\nalpha : float, default=1.0\n    Constant that multiplies the L1/L2 term. Defaults to 1.0.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\nwarm_start : bool, default=False\n    When set to ``True``, reuse the solution of the previous call to fit as\n    initialization, otherwise, just erase the previous solution.\n    See :term:`the Glossary <warm_start>`.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance.\n\ndual_gap_ : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\neps_ : float\n    The tolerance scaled scaled by the variance of the target `y`.\n\nsparse_coef_ : sparse matrix of shape (n_features,) or             (n_targets, n_features)\n    Sparse representation of the `coef_`.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nLasso: Linear Model trained with L1 prior as regularizer (aka the Lasso).\nMultiTaskLasso: Multi-task L1/L2 Lasso with built-in cross-validation.\nMultiTaskElasticNet: Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nTo avoid unnecessary memory duplication the X and y arguments of the fit\nmethod should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn import linear_model\n>>> clf = linear_model.MultiTaskLasso(alpha=0.1)\n>>> clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])\nMultiTaskLasso(alpha=0.1)\n>>> print(clf.coef_)\n[[0.         0.60809415]\n[0.         0.94592424]]\n>>> print(clf.intercept_)\n[-0.41888636 -0.87382323]"
         },
         {
@@ -24883,7 +24883,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
+            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
             "docstring": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15\n\nParameters\n----------\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : array-like, default=None\n    List of alphas where to compute the models.\n    If not provided, set automatically.\n\nfit_intercept : bool, default=True\n    Whether to calculate the intercept for this model. If set\n    to false, no intercept will be used in calculations\n    (i.e. data is expected to be centered).\n\nnormalize : bool, default=False\n    This parameter is ignored when ``fit_intercept`` is set to False.\n    If True, the regressors X will be normalized before regression by\n    subtracting the mean and dividing by the l2-norm.\n    If you wish to standardize, please use\n    :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\n    on an estimator with ``normalize=False``.\n\n    .. deprecated:: 1.0\n        ``normalize`` was deprecated in version 1.0 and will be removed in\n        1.2.\n\nmax_iter : int, default=1000\n    The maximum number of iterations.\n\ntol : float, default=1e-4\n    The tolerance for the optimization: if the updates are\n    smaller than ``tol``, the optimization code checks the\n    dual gap for optimality and continues until it is smaller\n    than ``tol``.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncv : int, cross-validation generator or iterable, default=None\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - None, to use the default 5-fold cross-validation,\n    - int, to specify the number of folds.\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For int/None inputs, :class:`KFold` is used.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. versionchanged:: 0.22\n        ``cv`` default value if None changed from 3-fold to 5-fold.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nn_jobs : int, default=None\n    Number of CPUs to use during the cross validation. Note that this is\n    used only if multiple values for l1_ratio are given.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nrandom_state : int, RandomState instance, default=None\n    The seed of the pseudo random number generator that selects a random\n    feature to update. Used when ``selection`` == 'random'.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nselection : {'cyclic', 'random'}, default='cyclic'\n    If set to 'random', a random coefficient is updated every iteration\n    rather than looping over features sequentially by default. This\n    (setting to 'random') often leads to significantly faster convergence\n    especially when tol is higher than 1e-4.\n\nAttributes\n----------\nintercept_ : ndarray of shape (n_targets,)\n    Independent term in decision function.\n\ncoef_ : ndarray of shape (n_targets, n_features)\n    Parameter vector (W in the cost function formula).\n    Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\nalpha_ : float\n    The amount of penalization chosen by cross validation.\n\nmse_path_ : ndarray of shape (n_alphas, n_folds)\n    Mean square error for the test set on each fold, varying alpha.\n\nalphas_ : ndarray of shape (n_alphas,)\n    The grid of alphas used for fitting.\n\nn_iter_ : int\n    Number of iterations run by the coordinate descent solver to reach\n    the specified tolerance for the optimal alpha.\n\ndual_gap_ : float\n    The dual gap at the end of the optimization for the optimal alpha.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2\n    mixed-norm as regularizer.\nElasticNetCV : Elastic net model with best model selection by\n    cross-validation.\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in\n    cross-validation.\n\nNotes\n-----\nThe algorithm used to fit the model is coordinate descent.\n\nIn `fit`, once the best parameter `alpha` is found through\ncross-validation, the model is fit again using the entire training set.\n\nTo avoid unnecessary memory duplication the `X` and `y` arguments of the\n`fit` method should be directly passed as Fortran-contiguous numpy arrays.\n\nExamples\n--------\n>>> from sklearn.linear_model import MultiTaskLassoCV\n>>> from sklearn.datasets import make_regression\n>>> from sklearn.metrics import r2_score\n>>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n>>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n>>> r2_score(y, reg.predict(X))\n0.9994...\n>>> reg.alpha_\n0.5713...\n>>> reg.predict(X[:1,])\narray([[153.7971...,  94.9015...]])"
         },
         {
@@ -24949,7 +24949,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23\n\nParameters\n----------\nalpha : float, default=1\n    Constant that multiplies the penalty term and thus determines the\n    regularization strength. ``alpha = 0`` is equivalent to unpenalized\n    GLMs. In this case, the design matrix `X` must have full column rank\n    (no collinearities).\n    Values must be in the range `[0.0, inf)`.\n\nfit_intercept : bool, default=True\n    Specifies if a constant (a.k.a. bias or intercept) should be\n    added to the linear predictor (X @ coef + intercept).\n\nsolver : 'lbfgs', default='lbfgs'\n    Algorithm to use in the optimization problem:\n\n    'lbfgs'\n        Calls scipy's L-BFGS-B optimizer.\n\nmax_iter : int, default=100\n    The maximal number of iterations for the solver.\n    Values must be in the range `[1, inf)`.\n\ntol : float, default=1e-4\n    Stopping criterion. For the lbfgs solver,\n    the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``\n    where ``g_j`` is the j-th component of the gradient (derivative) of\n    the objective function.\n    Values must be in the range `(0.0, inf)`.\n\nwarm_start : bool, default=False\n    If set to ``True``, reuse the solution of the previous call to ``fit``\n    as initialization for ``coef_`` and ``intercept_``.\n\nverbose : int, default=0\n    For the lbfgs solver set verbose to any positive number for verbosity.\n    Values must be in the range `[0, inf)`.\n\nAttributes\n----------\ncoef_ : array of shape (n_features,)\n    Estimated coefficients for the linear predictor (`X @ coef_ +\n    intercept_`) in the GLM.\n\nintercept_ : float\n    Intercept (a.k.a. bias) added to linear predictor.\n\nn_iter_ : int\n    Actual number of iterations used in the solver.\n\n_base_loss : BaseLoss, default=HalfSquaredError()\n    This is set during fit via `self._get_loss()`.\n    A `_base_loss` contains a specific loss function as well as the link\n    function. The loss to be minimized specifies the distributional assumption of\n    the GLM, i.e. the distribution from the EDM. Here are some examples:\n\n    =======================  ========  ==========================\n    _base_loss               Link      Target Domain\n    =======================  ========  ==========================\n    HalfSquaredError         identity  y any real number\n    HalfPoissonLoss          log       0 <= y\n    HalfGammaLoss            log       0 < y\n    HalfTweedieLoss          log       dependend on tweedie power\n    HalfTweedieLossIdentity  identity  dependend on tweedie power\n    =======================  ========  ==========================\n\n    The link function of the GLM, i.e. mapping from linear predictor\n    `X @ coeff + intercept` to prediction `y_pred`. For instance, with a log link,\n    we have `y_pred = exp(X @ coeff + intercept)`."
         },
         {
@@ -25057,7 +25057,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\nloss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n+ 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\ngradient = X.T @ loss.gradient + l2_reg_strength * coef\nhessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\nif fit_intercept:\nn_dof =  n_features + 1\nelse:\nn_dof = n_features\n\nif base_loss.is_multiclass:\ncoef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\nelse:\ncoef.shape = (n_dof,)\n\nThe intercept term is at the end of the coef array:\nif base_loss.is_multiclass:\nif coef.shape (n_classes, n_dof):\nintercept = coef[:, -1]\nif coef.shape (n_classes * n_dof,)\nintercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\nintercept.shape = (n_classes,)\nelse:\nintercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\ncoef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.",
+            "description": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\n    loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n           + 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\n    gradient = X.T @ loss.gradient + l2_reg_strength * coef\n    hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\n    if fit_intercept:\n        n_dof =  n_features + 1\n    else:\n        n_dof = n_features\n\n    if base_loss.is_multiclass:\n        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n    else:\n        coef.shape = (n_dof,)\n\n    The intercept term is at the end of the coef array:\n    if base_loss.is_multiclass:\n        if coef.shape (n_classes, n_dof):\n            intercept = coef[:, -1]\n        if coef.shape (n_classes * n_dof,)\n            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n        intercept.shape = (n_classes,)\n    else:\n        intercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n    coef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.",
             "docstring": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\n    loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n           + 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\n    gradient = X.T @ loss.gradient + l2_reg_strength * coef\n    hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\n    if fit_intercept:\n        n_dof =  n_features + 1\n    else:\n        n_dof = n_features\n\n    if base_loss.is_multiclass:\n        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n    else:\n        coef.shape = (n_dof,)\n\n    The intercept term is at the end of the coef array:\n    if base_loss.is_multiclass:\n        if coef.shape (n_classes, n_dof):\n            intercept = coef[:, -1]\n        if coef.shape (n_classes * n_dof,)\n            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n        intercept.shape = (n_classes,)\n    else:\n        intercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n    coef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.\n\nParameters\n----------\nbase_loss : instance of class BaseLoss from sklearn._loss.\nfit_intercept : bool"
         },
         {
@@ -26022,7 +26022,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
-            "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import ``enable_halving_search_cv``::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> # now you can import normally from model_selection\n>>> from sklearn.model_selection import HalvingGridSearchCV",
+            "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV",
             "docstring": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_grid : dict or list of dictionaries\n    Dictionary with parameters names (string) as keys and lists of\n    parameter settings to try as values, or a list of such\n    dictionaries, in which case the grids spanned by each dictionary\n    in the list are explored. This enables searching over any sequence\n    of parameter settings.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum amount of resource that any candidate is allowed to use\n    for a given iteration. By default, this is set to ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='exhaust'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingGridSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Ignored otherwise.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used\n    at each iteration must be a multiple of ``min_resources_``, the\n    actual number of resources used at the last iteration may be smaller\n    than ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingRandomSearchCV`:\n    Random search over a set of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingGridSearchCV\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n...\n>>> param_grid = {\"max_depth\": [3, None],\n...               \"min_samples_split\": [5, 10]}\n>>> search = HalvingGridSearchCV(clf, param_grid, resource='n_estimators',\n...                              max_resources=10,\n...                              random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}"
         },
         {
@@ -26037,7 +26037,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.model_selection"],
-            "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import ``enable_halving_search_cv``::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> # now you can import normally from model_selection\n>>> from sklearn.model_selection import HalvingRandomSearchCV",
+            "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV",
             "docstring": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV\n\nParameters\n----------\nestimator : estimator object\n    This is assumed to implement the scikit-learn estimator interface.\n    Either estimator needs to provide a ``score`` function,\n    or ``scoring`` must be passed.\n\nparam_distributions : dict\n    Dictionary with parameters names (string) as keys and distributions\n    or lists of parameters to try. Distributions must provide a ``rvs``\n    method for sampling (such as those from scipy.stats.distributions).\n    If a list is given, it is sampled uniformly.\n\nn_candidates : int, default='exhaust'\n    The number of candidate parameters to sample, at the first\n    iteration. Using 'exhaust' will sample enough candidates so that the\n    last iteration uses as many resources as possible, based on\n    `min_resources`, `max_resources` and `factor`. In this case,\n    `min_resources` cannot be 'exhaust'.\n\nfactor : int or float, default=3\n    The 'halving' parameter, which determines the proportion of candidates\n    that are selected for each subsequent iteration. For example,\n    ``factor=3`` means that only one third of the candidates are selected.\n\nresource : ``'n_samples'`` or str, default='n_samples'\n    Defines the resource that increases with each iteration. By default,\n    the resource is the number of samples. It can also be set to any\n    parameter of the base estimator that accepts positive integer\n    values, e.g. 'n_iterations' or 'n_estimators' for a gradient\n    boosting estimator. In this case ``max_resources`` cannot be 'auto'\n    and must be set explicitly.\n\nmax_resources : int, default='auto'\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. By default, this is set ``n_samples`` when\n    ``resource='n_samples'`` (default), else an error is raised.\n\nmin_resources : {'exhaust', 'smallest'} or int, default='smallest'\n    The minimum amount of resource that any candidate is allowed to use\n    for a given iteration. Equivalently, this defines the amount of\n    resources `r0` that are allocated for each candidate at the first\n    iteration.\n\n    - 'smallest' is a heuristic that sets `r0` to a small value:\n\n        - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n          problem\n        - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n          classification problem\n        - ``1`` when ``resource != 'n_samples'``\n\n    - 'exhaust' will set `r0` such that the **last** iteration uses as\n      much resources as possible. Namely, the last iteration will use the\n      highest value smaller than ``max_resources`` that is a multiple of\n      both ``min_resources`` and ``factor``. In general, using 'exhaust'\n      leads to a more accurate estimator, but is slightly more time\n      consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\n    Note that the amount of resources used at each iteration is always a\n    multiple of ``min_resources``.\n\naggressive_elimination : bool, default=False\n    This is only relevant in cases where there isn't enough resources to\n    reduce the remaining candidates to at most `factor` after the last\n    iteration. If ``True``, then the search process will 'replay' the\n    first iteration for as long as needed until the number of candidates\n    is small enough. This is ``False`` by default, which means that the\n    last iteration may evaluate more than ``factor`` candidates. See\n    :ref:`aggressive_elimination` for more details.\n\ncv : int, cross-validation generator or an iterable, default=5\n    Determines the cross-validation splitting strategy.\n    Possible inputs for cv are:\n\n    - integer, to specify the number of folds in a `(Stratified)KFold`,\n    - :term:`CV splitter`,\n    - An iterable yielding (train, test) splits as arrays of indices.\n\n    For integer/None inputs, if the estimator is a classifier and ``y`` is\n    either binary or multiclass, :class:`StratifiedKFold` is used. In all\n    other cases, :class:`KFold` is used. These splitters are instantiated\n    with `shuffle=False` so the splits will be the same across calls.\n\n    Refer :ref:`User Guide <cross_validation>` for the various\n    cross-validation strategies that can be used here.\n\n    .. note::\n        Due to implementation details, the folds produced by `cv` must be\n        the same across multiple calls to `cv.split()`. For\n        built-in `scikit-learn` iterators, this can be achieved by\n        deactivating shuffling (`shuffle=False`), or by setting the\n        `cv`'s `random_state` parameter to an integer.\n\nscoring : str, callable, or None, default=None\n    A single string (see :ref:`scoring_parameter`) or a callable\n    (see :ref:`scoring`) to evaluate the predictions on the test set.\n    If None, the estimator's score method is used.\n\nrefit : bool, default=True\n    If True, refit an estimator using the best found parameters on the\n    whole dataset.\n\n    The refitted estimator is made available at the ``best_estimator_``\n    attribute and permits using ``predict`` directly on this\n    ``HalvingRandomSearchCV`` instance.\n\nerror_score : 'raise' or numeric\n    Value to assign to the score if an error occurs in estimator fitting.\n    If set to 'raise', the error is raised. If a numeric value is given,\n    FitFailedWarning is raised. This parameter does not affect the refit\n    step, which will always raise the error. Default is ``np.nan``.\n\nreturn_train_score : bool, default=False\n    If ``False``, the ``cv_results_`` attribute will not include training\n    scores.\n    Computing training scores is used to get insights on how different\n    parameter settings impact the overfitting/underfitting trade-off.\n    However computing the scores on the training set can be computationally\n    expensive and is not strictly required to select the parameters that\n    yield the best generalization performance.\n\nrandom_state : int, RandomState instance or None, default=None\n    Pseudo random number generator state used for subsampling the dataset\n    when `resources != 'n_samples'`. Also used for random uniform\n    sampling from lists of possible values instead of scipy.stats\n    distributions.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nn_jobs : int or None, default=None\n    Number of jobs to run in parallel.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int\n    Controls the verbosity: the higher, the more messages.\n\nAttributes\n----------\nn_resources_ : list of int\n    The amount of resources used at each iteration.\n\nn_candidates_ : list of int\n    The number of candidate parameters that were evaluated at each\n    iteration.\n\nn_remaining_candidates_ : int\n    The number of candidate parameters that are left after the last\n    iteration. It corresponds to `ceil(n_candidates[-1] / factor)`\n\nmax_resources_ : int\n    The maximum number of resources that any candidate is allowed to use\n    for a given iteration. Note that since the number of resources used at\n    each iteration must be a multiple of ``min_resources_``, the actual\n    number of resources used at the last iteration may be smaller than\n    ``max_resources_``.\n\nmin_resources_ : int\n    The amount of resources that are allocated for each candidate at the\n    first iteration.\n\nn_iterations_ : int\n    The actual number of iterations that were run. This is equal to\n    ``n_required_iterations_`` if ``aggressive_elimination`` is ``True``.\n    Else, this is equal to ``min(n_possible_iterations_,\n    n_required_iterations_)``.\n\nn_possible_iterations_ : int\n    The number of iterations that are possible starting with\n    ``min_resources_`` resources and without exceeding\n    ``max_resources_``.\n\nn_required_iterations_ : int\n    The number of iterations that are required to end up with less than\n    ``factor`` candidates at the last iteration, starting with\n    ``min_resources_`` resources. This will be smaller than\n    ``n_possible_iterations_`` when there isn't enough resources.\n\ncv_results_ : dict of numpy (masked) ndarrays\n    A dict with keys as column headers and values as columns, that can be\n    imported into a pandas ``DataFrame``. It contains lots of information\n    for analysing the results of a search.\n    Please refer to the :ref:`User guide<successive_halving_cv_results>`\n    for details.\n\nbest_estimator_ : estimator or dict\n    Estimator that was chosen by the search, i.e. estimator\n    which gave highest score (or smallest loss if specified)\n    on the left out data. Not available if ``refit=False``.\n\nbest_score_ : float\n    Mean cross-validated score of the best_estimator.\n\nbest_params_ : dict\n    Parameter setting that gave the best results on the hold out data.\n\nbest_index_ : int\n    The index (of the ``cv_results_`` arrays) which corresponds to the best\n    candidate parameter setting.\n\n    The dict at ``search.cv_results_['params'][search.best_index_]`` gives\n    the parameter setting for the best model, that gives the highest\n    mean score (``search.best_score_``).\n\nscorer_ : function or a dict\n    Scorer function used on the held out data to choose the best\n    parameters for the model.\n\nn_splits_ : int\n    The number of cross-validation splits (folds/iterations).\n\nrefit_time_ : float\n    Seconds used for refitting the best model on the whole dataset.\n\n    This is present only if ``refit`` is not False.\n\nmultimetric_ : bool\n    Whether or not the scorers compute several metrics.\n\nclasses_ : ndarray of shape (n_classes,)\n    The classes labels. This is present only if ``refit`` is specified and\n    the underlying estimator is a classifier.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `n_features_in_` when fit.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Only defined if\n    `best_estimator_` is defined (see the documentation for the `refit`\n    parameter for more details) and that `best_estimator_` exposes\n    `feature_names_in_` when fit.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\n:class:`HalvingGridSearchCV`:\n    Search over a grid of parameters using successive halving.\n\nNotes\n-----\nThe parameters selected are those that maximize the score of the held-out\ndata, according to the scoring parameter.\n\nExamples\n--------\n\n>>> from sklearn.datasets import load_iris\n>>> from sklearn.ensemble import RandomForestClassifier\n>>> from sklearn.experimental import enable_halving_search_cv  # noqa\n>>> from sklearn.model_selection import HalvingRandomSearchCV\n>>> from scipy.stats import randint\n>>> import numpy as np\n...\n>>> X, y = load_iris(return_X_y=True)\n>>> clf = RandomForestClassifier(random_state=0)\n>>> np.random.seed(0)\n...\n>>> param_distributions = {\"max_depth\": [3, None],\n...                        \"min_samples_split\": randint(2, 11)}\n>>> search = HalvingRandomSearchCV(clf, param_distributions,\n...                                resource='n_estimators',\n...                                max_resources=10,\n...                                random_state=0).fit(X, y)\n>>> search.best_params_  # doctest: +SKIP\n{'max_depth': None, 'min_samples_split': 10, 'n_estimators': 9}"
         },
         {
@@ -26625,7 +26625,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.",
+            "description": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.",
             "docstring": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.\n\nParameters\n----------\npriors : array-like of shape (n_classes,)\n    Prior probabilities of the classes. If specified, the priors are not\n    adjusted according to the data.\n\nvar_smoothing : float, default=1e-9\n    Portion of the largest variance of all features that is added to\n    variances for calculation stability.\n\n    .. versionadded:: 0.20\n\nAttributes\n----------\nclass_count_ : ndarray of shape (n_classes,)\n    number of training samples observed in each class.\n\nclass_prior_ : ndarray of shape (n_classes,)\n    probability of each class.\n\nclasses_ : ndarray of shape (n_classes,)\n    class labels known to the classifier.\n\nepsilon_ : float\n    absolute additive value to variances.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nsigma_ : ndarray of shape (n_classes, n_features)\n    Variance of each feature per class.\n\n    .. deprecated:: 1.0\n       `sigma_` is deprecated in 1.0 and will be removed in 1.2.\n       Use `var_` instead.\n\nvar_ : ndarray of shape (n_classes, n_features)\n    Variance of each feature per class.\n\n    .. versionadded:: 1.0\n\ntheta_ : ndarray of shape (n_classes, n_features)\n    mean of each feature per class.\n\nSee Also\n--------\nBernoulliNB : Naive Bayes classifier for multivariate Bernoulli models.\nCategoricalNB : Naive Bayes classifier for categorical features.\nComplementNB : Complement Naive Bayes classifier.\nMultinomialNB : Naive Bayes classifier for multinomial models.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n>>> Y = np.array([1, 1, 1, 2, 2, 2])\n>>> from sklearn.naive_bayes import GaussianNB\n>>> clf = GaussianNB()\n>>> clf.fit(X, Y)\nGaussianNB()\n>>> print(clf.predict([[-0.8, -1]]))\n[1]\n>>> clf_pf = GaussianNB()\n>>> clf_pf.partial_fit(X, Y, np.unique(Y))\nGaussianNB()\n>>> print(clf_pf.predict([[-0.8, -1]]))\n[1]"
         },
         {
@@ -27213,7 +27213,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
-            "description": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\nK(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n\\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.",
+            "description": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\n    K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n    \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.",
             "docstring": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\n    K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n    \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.\n\nAttributes\n----------\nK_fit_rows_ : ndarray of shape (n_samples,)\n    Average of each column of kernel matrix.\n\nK_fit_all_ : float\n    Average of kernel matrix.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nsklearn.kernel_approximation.Nystroem : Approximate a kernel map\n    using a subset of the training data.\n\nReferences\n----------\n.. [1] `Sch\u00f6lkopf, Bernhard, Alexander Smola, and Klaus-Robert M\u00fcller.\n   \"Nonlinear component analysis as a kernel eigenvalue problem.\"\n   Neural computation 10.5 (1998): 1299-1319.\n   <https://www.mlpack.org/papers/kpca.pdf>`_\n\nExamples\n--------\n>>> from sklearn.preprocessing import KernelCenterer\n>>> from sklearn.metrics.pairwise import pairwise_kernels\n>>> X = [[ 1., -2.,  2.],\n...      [ -2.,  1.,  3.],\n...      [ 4.,  1., -2.]]\n>>> K = pairwise_kernels(X, metric='linear')\n>>> K\narray([[  9.,   2.,  -2.],\n       [  2.,  14., -13.],\n       [ -2., -13.,  21.]])\n>>> transformer = KernelCenterer().fit(K)\n>>> transformer\nKernelCenterer()\n>>> transformer.transform(K)\narray([[  5.,   0.,  -5.],\n       [  0.,  14., -14.],\n       [ -5., -14.,  19.]])"
         },
         {
@@ -27253,7 +27253,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
-            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\nX_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\nX_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
+            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\nfeature_range : tuple (min, max), default=(0, 1)\n    Desired range of transformed data.\n\ncopy : bool, default=True\n    Set to False to perform inplace row normalization and avoid a\n    copy (if the input is already a numpy array).\n\nclip : bool, default=False\n    Set to True to clip transformed values of held-out data to\n    provided `feature range`.\n\n    .. versionadded:: 0.24\n\nAttributes\n----------\nmin_ : ndarray of shape (n_features,)\n    Per feature adjustment for minimum. Equivalent to\n    ``min - X.min(axis=0) * self.scale_``\n\nscale_ : ndarray of shape (n_features,)\n    Per feature relative scaling of the data. Equivalent to\n    ``(max - min) / (X.max(axis=0) - X.min(axis=0))``\n\n    .. versionadded:: 0.17\n       *scale_* attribute.\n\ndata_min_ : ndarray of shape (n_features,)\n    Per feature minimum seen in the data\n\n    .. versionadded:: 0.17\n       *data_min_*\n\ndata_max_ : ndarray of shape (n_features,)\n    Per feature maximum seen in the data\n\n    .. versionadded:: 0.17\n       *data_max_*\n\ndata_range_ : ndarray of shape (n_features,)\n    Per feature range ``(data_max_ - data_min_)`` seen in the data\n\n    .. versionadded:: 0.17\n       *data_range_*\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nn_samples_seen_ : int\n    The number of samples processed by the estimator.\n    It will be reset on new calls to fit, but increments across\n    ``partial_fit`` calls.\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nminmax_scale : Equivalent function without the estimator API.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import MinMaxScaler\n>>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]\n>>> scaler = MinMaxScaler()\n>>> print(scaler.fit(data))\nMinMaxScaler()\n>>> print(scaler.data_max_)\n[ 1. 18.]\n>>> print(scaler.transform(data))\n[[0.   0.  ]\n [0.25 0.25]\n [0.5  0.5 ]\n [1.   1.  ]]\n>>> print(scaler.transform([[2, 2]]))\n[[1.5 0. ]]"
         },
         {
@@ -27357,7 +27357,7 @@
             ],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
-            "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\nz = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
+            "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\n    z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\n    z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\nParameters\n----------\ncopy : bool, default=True\n    If False, try to avoid a copy and do inplace scaling instead.\n    This is not guaranteed to always work inplace; e.g. if the data is\n    not a NumPy array or scipy.sparse CSR matrix, a copy may still be\n    returned.\n\nwith_mean : bool, default=True\n    If True, center the data before scaling.\n    This does not work (and will raise an exception) when attempted on\n    sparse matrices, because centering them entails building a dense\n    matrix which in common use cases is likely to be too large to fit in\n    memory.\n\nwith_std : bool, default=True\n    If True, scale the data to unit variance (or equivalently,\n    unit standard deviation).\n\nAttributes\n----------\nscale_ : ndarray of shape (n_features,) or None\n    Per feature relative scaling of the data to achieve zero mean and unit\n    variance. Generally this is calculated using `np.sqrt(var_)`. If a\n    variance is zero, we can't achieve unit variance, and the data is left\n    as-is, giving a scaling factor of 1. `scale_` is equal to `None`\n    when `with_std=False`.\n\n    .. versionadded:: 0.17\n       *scale_*\n\nmean_ : ndarray of shape (n_features,) or None\n    The mean value for each feature in the training set.\n    Equal to ``None`` when ``with_mean=False``.\n\nvar_ : ndarray of shape (n_features,) or None\n    The variance for each feature in the training set. Used to compute\n    `scale_`. Equal to ``None`` when ``with_std=False``.\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nn_samples_seen_ : int or ndarray of shape (n_features,)\n    The number of samples processed by the estimator for each feature.\n    If there are no missing samples, the ``n_samples_seen`` will be an\n    integer, otherwise it will be an array of dtype int. If\n    `sample_weights` are used it will be a float (if no missing data)\n    or an array of dtype float that sums the weights seen so far.\n    Will be reset on new calls to fit, but increments across\n    ``partial_fit`` calls.\n\nSee Also\n--------\nscale : Equivalent function without the estimator API.\n\n:class:`~sklearn.decomposition.PCA` : Further removes the linear\n    correlation across features with 'whiten=True'.\n\nNotes\n-----\nNaNs are treated as missing values: disregarded in fit, and maintained in\ntransform.\n\nWe use a biased estimator for the standard deviation, equivalent to\n`numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to\naffect model performance.\n\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.\n\nExamples\n--------\n>>> from sklearn.preprocessing import StandardScaler\n>>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]\n>>> scaler = StandardScaler()\n>>> print(scaler.fit(data))\nStandardScaler()\n>>> print(scaler.mean_)\n[0.5 0.5]\n>>> print(scaler.transform(data))\n[[-1. -1.]\n [-1. -1.]\n [ 1.  1.]\n [ 1.  1.]]\n>>> print(scaler.transform([[2, 2]]))\n[[3. 3.]]"
         },
         {
@@ -27615,7 +27615,7 @@
             ],
             "is_public": true,
             "reexported_by": [],
-            "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n- -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n-  0                              with probability 1 - 1 / s\n- +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13",
+            "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13",
             "docstring": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13\n\nParameters\n----------\nn_components : int or 'auto', default='auto'\n    Dimensionality of the target projection space.\n\n    n_components can be automatically adjusted according to the\n    number of samples in the dataset and the bound given by the\n    Johnson-Lindenstrauss lemma. In that case the quality of the\n    embedding is controlled by the ``eps`` parameter.\n\n    It should be noted that Johnson-Lindenstrauss lemma can yield\n    very conservative estimated of the required number of components\n    as it makes no assumption on the structure of the dataset.\n\ndensity : float or 'auto', default='auto'\n    Ratio in the range (0, 1] of non-zero component in the random\n    projection matrix.\n\n    If density = 'auto', the value is set to the minimum density\n    as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n    Use density = 1 / 3.0 if you want to reproduce the results from\n    Achlioptas, 2001.\n\neps : float, default=0.1\n    Parameter to control the quality of the embedding according to\n    the Johnson-Lindenstrauss lemma when n_components is set to\n    'auto'. This value should be strictly positive.\n\n    Smaller values lead to better embedding and higher number of\n    dimensions (n_components) in the target projection space.\n\ndense_output : bool, default=False\n    If True, ensure that the output of the random projection is a\n    dense numpy array even if the input and random projection matrix\n    are both sparse. In practice, if the number of components is\n    small the number of zero components in the projected data will\n    be very small and it will be more CPU and memory efficient to\n    use a dense representation.\n\n    If False, the projected data uses a sparse representation if\n    the input is sparse.\n\ncompute_inverse_components : bool, default=False\n    Learn the inverse transform by computing the pseudo-inverse of the\n    components during fit. Note that the pseudo-inverse is always a dense\n    array, even if the training data was sparse. This means that it might be\n    necessary to call `inverse_transform` on a small batch of samples at a\n    time to avoid exhausting the available memory on the host. Moreover,\n    computing the pseudo-inverse does not scale well to large matrices.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generator used to generate the\n    projection matrix at fit time.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nAttributes\n----------\nn_components_ : int\n    Concrete number of components computed when n_components=\"auto\".\n\ncomponents_ : sparse matrix of shape (n_components, n_features)\n    Random matrix used for the projection. Sparse matrix will be of CSR\n    format.\n\ninverse_components_ : ndarray of shape (n_features, n_components)\n    Pseudo-inverse of the components, only computed if\n    `compute_inverse_components` is True.\n\n    .. versionadded:: 1.1\n\ndensity_ : float in range 0.0 - 1.0\n    Concrete density computed from when density = \"auto\".\n\nn_features_in_ : int\n    Number of features seen during :term:`fit`.\n\n    .. versionadded:: 0.24\n\nfeature_names_in_ : ndarray of shape (`n_features_in_`,)\n    Names of features seen during :term:`fit`. Defined only when `X`\n    has feature names that are all strings.\n\n    .. versionadded:: 1.0\n\nSee Also\n--------\nGaussianRandomProjection : Reduce dimensionality through Gaussian\n    random projection.\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n       \"Very Sparse Random Projections\".\n       https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n       https://users.soe.ucsc.edu/~optas/papers/jl.pdf\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.random_projection import SparseRandomProjection\n>>> rng = np.random.RandomState(42)\n>>> X = rng.rand(25, 3000)\n>>> transformer = SparseRandomProjection(random_state=rng)\n>>> X_new = transformer.fit_transform(X)\n>>> X_new.shape\n(25, 2759)\n>>> # very few components are non-zero\n>>> np.mean(transformer.components_ != 0)\n0.0182..."
         },
         {
@@ -28230,7 +28230,7 @@
             ],
             "is_public": false,
             "reexported_by": [],
-            "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\nEstimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\nmay lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\nhere)\n- format() directly calls _safe_repr() for a first try at rendering the\nobject\n- _safe_repr formats the whole object recursively, only calling itself,\nnot caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\nthe appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\nthe type of the object. This where the line length and the compact\nparameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\nrendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\nsingle line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.",
+            "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n  Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n  may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n  here)\n- format() directly calls _safe_repr() for a first try at rendering the\n  object\n- _safe_repr formats the whole object recursively, only calling itself,\n  not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n  the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n  the type of the object. This where the line length and the compact\n  parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n  rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n  single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.",
             "docstring": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n  Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n  may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n  here)\n- format() directly calls _safe_repr() for a first try at rendering the\n  object\n- _safe_repr formats the whole object recursively, only calling itself,\n  not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n  the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n  the type of the object. This where the line length and the compact\n  parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n  rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n  single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this."
         },
         {
@@ -28580,7 +28580,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n- python setup.py build_ext --compiler=<compiler>\n- CC=<compiler> python setup.py build_ext",
+            "description": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n    - python setup.py build_ext --compiler=<compiler>\n    - CC=<compiler> python setup.py build_ext",
             "docstring": "Get a compiler equivalent to the one that will be used to build sklearn\n\nHandles compiler specified as follows:\n    - python setup.py build_ext --compiler=<compiler>\n    - CC=<compiler> python setup.py build_ext"
         },
         {
@@ -28792,7 +28792,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "None",
-                        "description": "If True, only the parameters that were set to non-default\nvalues will be printed when printing an estimator. For example,\n``print(SVC())`` while True will only print 'SVC()', but would print\n'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\nwhen False. If None, the existing value won't change.\nThe default value is True.\n\n.. versionchanged:: 0.23\nDefault changed from False to True."
+                        "description": "If True, only the parameters that were set to non-default\nvalues will be printed when printing an estimator. For example,\n``print(SVC())`` while True will only print 'SVC()', but would print\n'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters\nwhen False. If None, the existing value won't change.\nThe default value is True.\n\n.. versionchanged:: 0.23\n   Default changed from False to True."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -29338,7 +29338,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n= -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.",
+            "description": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n     = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.",
             "docstring": "Compute the derivative of the unit deviance w.r.t. y_pred.\n\nThe derivative of the unit deviance is given by\n:math:`\\frac{\\partial}{\\partialy_\\textrm{pred}}d(y,y_\\textrm{pred})\n     = -2\\frac{y-y_\\textrm{pred}}{v(y_\\textrm{pred})}`\nwith unit variance :math:`v(y_\\textrm{pred})`.\n\nParameters\n----------\ny : array of shape (n_samples,)\n    Target values.\n\ny_pred : array of shape (n_samples,)\n    Predicted mean."
         },
         {
@@ -29382,7 +29382,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n\\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n\\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.",
+            "description": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n    \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n    \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.",
             "docstring": "Compute the unit variance function.\n\nThe unit variance :math:`v(y_\\textrm{pred})` determines the variance as\na function of the mean :math:`y_\\textrm{pred}` by\n:math:`\\mathrm{Var}[Y_i] = \\phi/s_i*v(y_\\textrm{pred}_i)`.\nIt can also be derived from the unit deviance\n:math:`d(y,y_\\textrm{pred})` as\n\n.. math:: v(y_\\textrm{pred}) = \\frac{2}{\n    \\frac{\\partial^2 d(y,y_\\textrm{pred})}{\n    \\partialy_\\textrm{pred}^2}}\\big|_{y=y_\\textrm{pred}}\n\nSee also :func:`variance`.\n\nParameters\n----------\ny_pred : array of shape (n_samples,)\n    Predicted mean."
         },
         {
@@ -30425,7 +30425,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Absolute error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the absolute error is defined as::\n\nloss(x_i) = |y_true_i - raw_prediction_i|",
+            "description": "Absolute error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the absolute error is defined as::\n\n    loss(x_i) = |y_true_i - raw_prediction_i|",
             "docstring": ""
         },
         {
@@ -30665,7 +30665,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Base class for a loss function of 1-dimensional targets.\n\nConventions:\n\n- y_true.shape = sample_weight.shape = (n_samples,)\n- y_pred.shape = raw_prediction.shape = (n_samples,)\n- If is_multiclass is true (multiclass classification), then\ny_pred.shape = raw_prediction.shape = (n_samples, n_classes)\nNote that this corresponds to the return value of decision_function.\n\ny_true, y_pred, sample_weight and raw_prediction must either be all float64\nor all float32.\ngradient and hessian must be either both float64 or both float32.\n\nNote that y_pred = link.inverse(raw_prediction).\n\nSpecific loss classes can inherit specific link classes to satisfy\nBaseLink's abstractmethods.",
+            "description": "Base class for a loss function of 1-dimensional targets.\n\nConventions:\n\n    - y_true.shape = sample_weight.shape = (n_samples,)\n    - y_pred.shape = raw_prediction.shape = (n_samples,)\n    - If is_multiclass is true (multiclass classification), then\n      y_pred.shape = raw_prediction.shape = (n_samples, n_classes)\n      Note that this corresponds to the return value of decision_function.\n\ny_true, y_pred, sample_weight and raw_prediction must either be all float64\nor all float32.\ngradient and hessian must be either both float64 or both float32.\n\nNote that y_pred = link.inverse(raw_prediction).\n\nSpecific loss classes can inherit specific link classes to satisfy\nBaseLink's abstractmethods.",
             "docstring": ""
         },
         {
@@ -31265,7 +31265,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 }
             ],
@@ -31621,7 +31621,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Binomial deviance loss with logit link, for binary classification.\n\nThis is also know as binary cross entropy, log-loss and logistic loss.\n\nDomain:\ny_true in [0, 1], i.e. regression on the unit interval\ny_pred in (0, 1), i.e. boundaries excluded\n\nLink:\ny_pred = expit(raw_prediction)\n\nFor a given sample x_i, half Binomial deviance is defined as the negative\nlog-likelihood of the Binomial/Bernoulli distribution and can be expressed\nas::\n\nloss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).\n\nNote that the formulation works for classification, y = {0, 1}, as well as\nlogistic regression, y = [0, 1].\nIf you add `constant_to_optimal_zero` to the loss, you get half the\nBernoulli/binomial deviance.",
+            "description": "Half Binomial deviance loss with logit link, for binary classification.\n\nThis is also know as binary cross entropy, log-loss and logistic loss.\n\nDomain:\ny_true in [0, 1], i.e. regression on the unit interval\ny_pred in (0, 1), i.e. boundaries excluded\n\nLink:\ny_pred = expit(raw_prediction)\n\nFor a given sample x_i, half Binomial deviance is defined as the negative\nlog-likelihood of the Binomial/Bernoulli distribution and can be expressed\nas::\n\n    loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i\n\nSee The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,\nsection 4.4.1 (about logistic regression).\n\nNote that the formulation works for classification, y = {0, 1}, as well as\nlogistic regression, y = [0, 1].\nIf you add `constant_to_optimal_zero` to the loss, you get half the\nBernoulli/binomial deviance.",
             "docstring": ""
         },
         {
@@ -31761,7 +31761,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Gamma deviance loss with log-link, for regression.\n\nDomain:\ny_true and y_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Gamma deviance loss is defined as::\n\nloss(x_i) = log(exp(raw_prediction_i)/y_true_i)\n+ y_true/exp(raw_prediction_i) - 1\n\nHalf the Gamma deviance is actually proportional to the negative log-\nlikelihood up to constant terms (not involving raw_prediction) and\nsimplifies the computation of the gradients.\nWe also skip the constant term `-log(y_true_i) - 1`.",
+            "description": "Half Gamma deviance loss with log-link, for regression.\n\nDomain:\ny_true and y_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Gamma deviance loss is defined as::\n\n    loss(x_i) = log(exp(raw_prediction_i)/y_true_i)\n                + y_true/exp(raw_prediction_i) - 1\n\nHalf the Gamma deviance is actually proportional to the negative log-\nlikelihood up to constant terms (not involving raw_prediction) and\nsimplifies the computation of the gradients.\nWe also skip the constant term `-log(y_true_i) - 1`.",
             "docstring": ""
         },
         {
@@ -31871,7 +31871,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Categorical cross-entropy loss, for multiclass classification.\n\nDomain:\ny_true in {0, 1, 2, 3, .., n_classes - 1}\ny_pred has n_classes elements, each element in (0, 1)\n\nLink:\ny_pred = softmax(raw_prediction)\n\nNote: We assume y_true to be already label encoded. The inverse link is\nsoftmax. But the full link function is the symmetric multinomial logit\nfunction.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the multinomial distribution, it\ngeneralizes the binary cross-entropy to more than 2 classes::\n\nloss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))\n- sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)\n\nSee [1].\n\nNote that for the hessian, we calculate only the diagonal part in the\nclasses: If the full hessian for classes k and l and sample i is H_i_k_l,\nwe calculate H_i_k_k, i.e. k=l.",
+            "description": "Categorical cross-entropy loss, for multiclass classification.\n\nDomain:\ny_true in {0, 1, 2, 3, .., n_classes - 1}\ny_pred has n_classes elements, each element in (0, 1)\n\nLink:\ny_pred = softmax(raw_prediction)\n\nNote: We assume y_true to be already label encoded. The inverse link is\nsoftmax. But the full link function is the symmetric multinomial logit\nfunction.\n\nFor a given sample x_i, the categorical cross-entropy loss is defined as\nthe negative log-likelihood of the multinomial distribution, it\ngeneralizes the binary cross-entropy to more than 2 classes::\n\n    loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))\n            - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)\n\nSee [1].\n\nNote that for the hessian, we calculate only the diagonal part in the\nclasses: If the full hessian for classes k and l and sample i is H_i_k_l,\nwe calculate H_i_k_k, i.e. k=l.",
             "docstring": ""
         },
         {
@@ -32211,7 +32211,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Poisson deviance loss with log-link, for regression.\n\nDomain:\ny_true in non-negative real numbers\ny_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half the Poisson deviance is defined as::\n\nloss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))\n- y_true_i + exp(raw_prediction_i)\n\nHalf the Poisson deviance is actually the negative log-likelihood up to\nconstant terms (not involving raw_prediction) and simplifies the\ncomputation of the gradients.\nWe also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.",
+            "description": "Half Poisson deviance loss with log-link, for regression.\n\nDomain:\ny_true in non-negative real numbers\ny_pred in positive real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half the Poisson deviance is defined as::\n\n    loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))\n                - y_true_i + exp(raw_prediction_i)\n\nHalf the Poisson deviance is actually the negative log-likelihood up to\nconstant terms (not involving raw_prediction) and simplifies the\ncomputation of the gradients.\nWe also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.",
             "docstring": ""
         },
         {
@@ -32307,7 +32307,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half squared error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half squared error is defined as::\n\nloss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2\n\nThe factor of 0.5 simplifies the computation of gradients and results in a\nunit hessian (and is consistent with what is done in LightGBM). It is also\nhalf the Normal distribution deviance.",
+            "description": "Half squared error with identity link, for regression.\n\nDomain:\ny_true and y_pred all real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half squared error is defined as::\n\n    loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2\n\nThe factor of 0.5 simplifies the computation of gradients and results in a\nunit hessian (and is consistent with what is done in LightGBM). It is also\nhalf the Normal distribution deviance.",
             "docstring": ""
         },
         {
@@ -32362,7 +32362,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\nloss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n- y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n+ exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
+            "description": "Half Tweedie deviance loss with log-link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers\npower in real numbers\n\nLink:\ny_pred = exp(raw_prediction)\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)\n                + exp(raw_prediction_i)**(2-p) / (2-p)\n\nTaking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,\nHalfPoissonLoss and HalfGammaLoss.\n\nWe also skip constant terms, but those are different for p=0, 1, 2.\nTherefore, the loss is not continuous in `power`.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": ""
         },
         {
@@ -32472,7 +32472,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Half Tweedie deviance loss with identity link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers for power != 0\ny_pred in real numbers for power = 0\npower in real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\nloss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n- y_true_i * raw_prediction_i**(1-p) / (1-p)\n+ raw_prediction_i**(2-p) / (2-p)\n\nNote that the minimum value of this loss is 0.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
+            "description": "Half Tweedie deviance loss with identity link, for regression.\n\nDomain:\ny_true in real numbers for power <= 0\ny_true in non-negative real numbers for 0 < power < 2\ny_true in positive real numbers for 2 <= power\ny_pred in positive real numbers for power != 0\ny_pred in real numbers for power = 0\npower in real numbers\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, half Tweedie deviance loss with p=power is defined\nas::\n\n    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)\n                - y_true_i * raw_prediction_i**(1-p) / (1-p)\n                + raw_prediction_i**(2-p) / (2-p)\n\nNote that the minimum value of this loss is 0.\n\nNote furthermore that although no Tweedie distribution exists for\n0 < power < 1, it still gives a strictly consistent scoring function for\nthe expectation.",
             "docstring": ""
         },
         {
@@ -32527,7 +32527,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Quantile loss aka pinball loss, for regression.\n\nDomain:\ny_true and y_pred all real numbers\nquantile in (0, 1)\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the pinball loss is defined as::\n\nloss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)\n\nrho_{quantile}(u) = u * (quantile - 1_{u<0})\n= -u *(1 - quantile)  if u < 0\nu * quantile       if u >= 0\n\nNote: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().",
+            "description": "Quantile loss aka pinball loss, for regression.\n\nDomain:\ny_true and y_pred all real numbers\nquantile in (0, 1)\n\nLink:\ny_pred = raw_prediction\n\nFor a given sample x_i, the pinball loss is defined as::\n\n    loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)\n\n    rho_{quantile}(u) = u * (quantile - 1_{u<0})\n                      = -u *(1 - quantile)  if u < 0\n                         u * quantile       if u >= 0\n\nNote: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().",
             "docstring": ""
         },
         {
@@ -32791,7 +32791,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "",
-                        "description": "Whether to reset the `feature_names_in_` attribute.\nIf False, the input will be checked for consistency with\nfeature names of data provided when reset was last True.\n.. note::\nIt is recommended to call `reset=True` in `fit` and in the first\ncall to `partial_fit`. All other methods that validate `X`\nshould set `reset=False`."
+                        "description": "Whether to reset the `feature_names_in_` attribute.\nIf False, the input will be checked for consistency with\nfeature names of data provided when reset was last True.\n.. note::\n   It is recommended to call `reset=True` in `fit` and in the first\n   call to `partial_fit`. All other methods that validate `X`\n   should set `reset=False`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -32861,7 +32861,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "",
-                        "description": "If True, the `n_features_in_` attribute is set to `X.shape[1]`.\nIf False and the attribute exists, then check that it is equal to\n`X.shape[1]`. If False and the attribute does *not* exist, then\nthe check is skipped.\n.. note::\nIt is recommended to call reset=True in `fit` and in the first\ncall to `partial_fit`. All other methods that validate `X`\nshould set `reset=False`."
+                        "description": "If True, the `n_features_in_` attribute is set to `X.shape[1]`.\nIf False and the attribute exists, then check that it is equal to\n`X.shape[1]`. If False and the attribute does *not* exist, then\nthe check is skipped.\n.. note::\n   It is recommended to call reset=True in `fit` and in the first\n   call to `partial_fit`. All other methods that validate `X`\n   should set `reset=False`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -33107,7 +33107,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "'no_validation'",
-                        "description": "The targets.\n\n- If `None`, `check_array` is called on `X`. If the estimator's\nrequires_y tag is True, then an error will be raised.\n- If `'no_validation'`, `check_array` is called on `X` and the\nestimator's requires_y tag is ignored. This is a default\nplaceholder and is never meant to be explicitly set. In that case\n`X` must be passed.\n- Otherwise, only `y` with `_check_y` or both `X` and `y` are\nchecked with either `check_array` or `check_X_y` depending on\n`validate_separately`."
+                        "description": "The targets.\n\n- If `None`, `check_array` is called on `X`. If the estimator's\n  requires_y tag is True, then an error will be raised.\n- If `'no_validation'`, `check_array` is called on `X` and the\n  estimator's requires_y tag is ignored. This is a default\n  placeholder and is never meant to be explicitly set. In that case\n  `X` must be passed.\n- Otherwise, only `y` with `_check_y` or both `X` and `y` are\n  checked with either `check_array` or `check_X_y` depending on\n  `validate_separately`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -33124,7 +33124,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Whether to reset the `n_features_in_` attribute.\nIf False, the input will be checked for consistency with data\nprovided when reset was last True.\n.. note::\nIt is recommended to call reset=True in `fit` and in the first\ncall to `partial_fit`. All other methods that validate `X`\nshould set `reset=False`."
+                        "description": "Whether to reset the `n_features_in_` attribute.\nIf False, the input will be checked for consistency with data\nprovided when reset was last True.\n.. note::\n   It is recommended to call reset=True in `fit` and in the first\n   call to `partial_fit`. All other methods that validate `X`\n   should set `reset=False`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -34062,7 +34062,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -34377,7 +34377,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sigmoid", "isotonic"]
+                        "values": ["isotonic", "sigmoid"]
                     }
                 },
                 {
@@ -34390,7 +34390,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator, iterable or \"prefit\"",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `base_estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `base_estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -34893,7 +34893,7 @@
                     "docstring": {
                         "type": "{'uniform', 'quantile'}",
                         "default_value": "'uniform'",
-                        "description": "Strategy used to define the widths of the bins.\n\n- `'uniform'`: The bins have identical widths.\n- `'quantile'`: The bins have the same number of samples and depend\non predicted probabilities."
+                        "description": "Strategy used to define the widths of the bins.\n\n- `'uniform'`: The bins have identical widths.\n- `'quantile'`: The bins have the same number of samples and depend\n  on predicted probabilities."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -35082,7 +35082,7 @@
                     "docstring": {
                         "type": "{'uniform', 'quantile'}",
                         "default_value": "'uniform'",
-                        "description": "Strategy used to define the widths of the bins.\n\n- `'uniform'`: The bins have identical widths.\n- `'quantile'`: The bins have the same number of samples and depend\non predicted probabilities."
+                        "description": "Strategy used to define the widths of the bins.\n\n- `'uniform'`: The bins have identical widths.\n- `'quantile'`: The bins have the same number of samples and depend\n  on predicted probabilities."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -35370,7 +35370,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sigmoid", "isotonic"]
+                        "values": ["isotonic", "sigmoid"]
                     }
                 }
             ],
@@ -35748,7 +35748,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sigmoid", "isotonic"]
+                        "values": ["isotonic", "sigmoid"]
                     }
                 },
                 {
@@ -35942,7 +35942,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sigmoid", "isotonic"]
+                        "values": ["isotonic", "sigmoid"]
                     }
                 },
                 {
@@ -36182,7 +36182,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "\"deprecated\"",
-                        "description": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\nis not a proper probability. If True, the smallest value in y_prob\nis linearly mapped onto 0 and the largest one onto 1.\n\n.. deprecated:: 1.1\nThe normalize argument is deprecated in v1.1 and will be removed in v1.3.\nExplicitly normalizing `y_prob` will reproduce this behavior, but it is\nrecommended that a proper probability is used (i.e. a classifier's\n`predict_proba` positive class)."
+                        "description": "Whether y_prob needs to be normalized into the [0, 1] interval, i.e.\nis not a proper probability. If True, the smallest value in y_prob\nis linearly mapped onto 0 and the largest one onto 1.\n\n.. deprecated:: 1.1\n    The normalize argument is deprecated in v1.1 and will be removed in v1.3.\n    Explicitly normalizing `y_prob` will reproduce this behavior, but it is\n    recommended that a proper probability is used (i.e. a classifier's\n    `predict_proba` positive class)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -36216,7 +36216,7 @@
                     "docstring": {
                         "type": "{'uniform', 'quantile'}",
                         "default_value": "'uniform'",
-                        "description": "Strategy used to define the widths of the bins.\n\nuniform\nThe bins have identical widths.\nquantile\nThe bins have the same number of samples and depend on `y_prob`."
+                        "description": "Strategy used to define the widths of the bins.\n\nuniform\n    The bins have identical widths.\nquantile\n    The bins have the same number of samples and depend on `y_prob`."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -36401,7 +36401,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.23\nthis parameter was previously hardcoded as 0."
+                        "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.23\n    this parameter was previously hardcoded as 0."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -36858,7 +36858,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.23\nthis parameter was previously hardcoded as 0."
+                        "description": "Pseudo-random number generator to control the starting state.\nUse an int for reproducible results across function calls.\nSee the :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.23\n    this parameter was previously hardcoded as 0."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -37045,11 +37045,11 @@
                     "docstring": {
                         "type": "{'ward', 'complete', 'average', 'single'}",
                         "default_value": "'ward'",
-                        "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of observation. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- 'ward' minimizes the variance of the clusters being merged.\n- 'average' uses the average of the distances of each observation of\nthe two sets.\n- 'complete' or 'maximum' linkage uses the maximum distances between\nall observations of the two sets.\n- 'single' uses the minimum of the distances between all observations\nof the two sets.\n\n.. versionadded:: 0.20\nAdded the 'single' option"
+                        "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of observation. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- 'ward' minimizes the variance of the clusters being merged.\n- 'average' uses the average of the distances of each observation of\n  the two sets.\n- 'complete' or 'maximum' linkage uses the maximum distances between\n  all observations of the two sets.\n- 'single' uses the minimum of the distances between all observations\n  of the two sets.\n\n.. versionadded:: 0.20\n    Added the 'single' option"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ward", "average", "single", "complete"]
+                        "values": ["complete", "average", "single", "ward"]
                     }
                 },
                 {
@@ -37419,11 +37419,11 @@
                     "docstring": {
                         "type": "{\"ward\", \"complete\", \"average\", \"single\"}",
                         "default_value": "\"ward\"",
-                        "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of features. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- \"ward\" minimizes the variance of the clusters being merged.\n- \"complete\" or maximum linkage uses the maximum distances between\nall features of the two sets.\n- \"average\" uses the average of the distances of each feature of\nthe two sets.\n- \"single\" uses the minimum of the distances between all features\nof the two sets."
+                        "description": "Which linkage criterion to use. The linkage criterion determines which\ndistance to use between sets of features. The algorithm will merge\nthe pairs of cluster that minimize this criterion.\n\n- \"ward\" minimizes the variance of the clusters being merged.\n- \"complete\" or maximum linkage uses the maximum distances between\n  all features of the two sets.\n- \"average\" uses the average of the distances of each feature of\n  the two sets.\n- \"single\" uses the minimum of the distances between all features\n  of the two sets."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ward", "average", "single", "complete"]
+                        "values": ["complete", "average", "single", "ward"]
                     }
                 },
                 {
@@ -37996,11 +37996,11 @@
                     "docstring": {
                         "type": "{\"average\", \"complete\", \"single\"}",
                         "default_value": "\"complete\"",
-                        "description": "Which linkage criteria to use. The linkage criterion determines which\ndistance to use between sets of observation.\n- \"average\" uses the average of the distances of each observation of\nthe two sets.\n- \"complete\" or maximum linkage uses the maximum distances between\nall observations of the two sets.\n- \"single\" uses the minimum of the distances between all\nobservations of the two sets."
+                        "description": "Which linkage criteria to use. The linkage criterion determines which\ndistance to use between sets of observation.\n    - \"average\" uses the average of the distances of each observation of\n      the two sets.\n    - \"complete\" or maximum linkage uses the maximum distances between\n      all observations of the two sets.\n    - \"single\" uses the minimum of the distances between all\n      observations of the two sets."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["average", "single", "complete"]
+                        "values": ["complete", "average", "single"]
                     }
                 },
                 {
@@ -38568,11 +38568,11 @@
                     "docstring": {
                         "type": "{'bistochastic', 'scale', 'log'}",
                         "default_value": "'bistochastic'",
-                        "description": "Method of normalizing and converting singular vectors into\nbiclusters. May be one of 'scale', 'bistochastic', or 'log'.\nThe authors recommend using 'log'. If the data is sparse,\nhowever, log normalization will not work, which is why the\ndefault is 'bistochastic'.\n\n.. warning::\nif `method='log'`, the data must be sparse."
+                        "description": "Method of normalizing and converting singular vectors into\nbiclusters. May be one of 'scale', 'bistochastic', or 'log'.\nThe authors recommend using 'log'. If the data is sparse,\nhowever, log normalization will not work, which is why the\ndefault is 'bistochastic'.\n\n.. warning::\n   if `method='log'`, the data must be sparse."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["scale", "log", "bistochastic"]
+                        "values": ["log", "bistochastic", "scale"]
                     }
                 },
                 {
@@ -38623,7 +38623,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["randomized", "arpack"]
+                        "values": ["arpack", "randomized"]
                     }
                 },
                 {
@@ -38677,7 +38677,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -39007,7 +39007,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["randomized", "arpack"]
+                        "values": ["arpack", "randomized"]
                     }
                 },
                 {
@@ -39379,7 +39379,7 @@
                     "docstring": {
                         "type": "int, instance of sklearn.cluster model",
                         "default_value": "3",
-                        "description": "Number of clusters after the final clustering step, which treats the\nsubclusters from the leaves as new samples.\n\n- `None` : the final clustering step is not performed and the\nsubclusters are returned as they are.\n\n- :mod:`sklearn.cluster` Estimator : If a model is provided, the model\nis fit treating the subclusters as new samples and the initial data\nis mapped to the label of the closest subcluster.\n\n- `int` : the model fit is :class:`AgglomerativeClustering` with\n`n_clusters` set to be equal to the int."
+                        "description": "Number of clusters after the final clustering step, which treats the\nsubclusters from the leaves as new samples.\n\n- `None` : the final clustering step is not performed and the\n  subclusters are returned as they are.\n\n- :mod:`sklearn.cluster` Estimator : If a model is provided, the model\n  is fit treating the subclusters as new samples and the initial data\n  is mapped to the label of the closest subcluster.\n\n- `int` : the model fit is :class:`AgglomerativeClustering` with\n  `n_clusters` set to be equal to the int."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -40433,7 +40433,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\naccording to the nearest distance between the subclusters to the\npair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.",
+            "description": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n   according to the nearest distance between the subclusters to the\n   pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters.",
             "docstring": "The node has to be split if there is no place for a new subcluster\nin the node.\n1. Two empty nodes and two empty subclusters are initialized.\n2. The pair of distant subclusters are found.\n3. The properties of the empty subclusters and nodes are updated\n   according to the nearest distance between the subclusters to the\n   pair of distant subclusters.\n4. The two nodes are set as children to the two subclusters."
         },
         {
@@ -40490,7 +40490,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -40628,7 +40628,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lloyd", "elkan"]
+                        "values": ["elkan", "lloyd"]
                     }
                 },
                 {
@@ -40641,11 +40641,11 @@
                     "docstring": {
                         "type": "{\"biggest_inertia\", \"largest_cluster\"}",
                         "default_value": "\"biggest_inertia\"",
-                        "description": "Defines how bisection should be performed:\n\n- \"biggest_inertia\" means that BisectingKMeans will always check\nall calculated cluster for cluster with biggest SSE\n(Sum of squared errors) and bisect it. This approach concentrates on\nprecision, but may be costly in terms of execution time (especially for\nlarger amount of data points).\n\n- \"largest_cluster\" - BisectingKMeans will always split cluster with\nlargest amount of points assigned to it from all clusters\npreviously calculated. That should work faster than picking by SSE\n('biggest_inertia') and may produce similar results in most cases."
+                        "description": "Defines how bisection should be performed:\n\n - \"biggest_inertia\" means that BisectingKMeans will always check\n    all calculated cluster for cluster with biggest SSE\n    (Sum of squared errors) and bisect it. This approach concentrates on\n    precision, but may be costly in terms of execution time (especially for\n    larger amount of data points).\n\n - \"largest_cluster\" - BisectingKMeans will always split cluster with\n    largest amount of points assigned to it from all clusters\n    previously calculated. That should work faster than picking by SSE\n    ('biggest_inertia') and may produce similar results in most cases."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["biggest_inertia", "largest_cluster"]
+                        "values": ["largest_cluster", "biggest_inertia"]
                     }
                 }
             ],
@@ -41106,7 +41106,7 @@
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "Training instances to cluster.\n\n.. note:: The data will be converted to C ordering,\nwhich will cause a memory copy\nif the given data is not C-contiguous."
+                        "description": "Training instances to cluster.\n\n.. note:: The data will be converted to C ordering,\n    which will cause a memory copy\n    if the given data is not C-contiguous."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -41472,7 +41472,7 @@
                     "docstring": {
                         "type": "str, or callable",
                         "default_value": "'euclidean'",
-                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a :term:`sparse graph`, in which\ncase only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n.. versionadded:: 0.17\nmetric *precomputed* to accept precomputed sparse matrix."
+                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by :func:`sklearn.metrics.pairwise_distances` for\nits metric parameter.\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a :term:`sparse graph`, in which\ncase only \"nonzero\" elements may be considered neighbors for DBSCAN.\n\n.. versionadded:: 0.17\n   metric *precomputed* to accept precomputed sparse matrix."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -41519,7 +41519,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -41912,7 +41912,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -42132,7 +42132,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -42270,11 +42270,11 @@
                     "docstring": {
                         "type": "{\"lloyd\", \"elkan\", \"auto\", \"full\"}",
                         "default_value": "\"lloyd\"",
-                        "description": "K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\nThe `\"elkan\"` variation can be more efficient on some datasets with\nwell-defined clusters, by using the triangle inequality. However it's\nmore memory intensive due to the allocation of an extra array of shape\n`(n_samples, n_clusters)`.\n\n`\"auto\"` and `\"full\"` are deprecated and they will be removed in\nScikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n.. versionchanged:: 0.18\nAdded Elkan algorithm\n\n.. versionchanged:: 1.1\nRenamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\nChanged \"auto\" to use \"lloyd\" instead of \"elkan\"."
+                        "description": "K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\nThe `\"elkan\"` variation can be more efficient on some datasets with\nwell-defined clusters, by using the triangle inequality. However it's\nmore memory intensive due to the allocation of an extra array of shape\n`(n_samples, n_clusters)`.\n\n`\"auto\"` and `\"full\"` are deprecated and they will be removed in\nScikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n.. versionchanged:: 0.18\n    Added Elkan algorithm\n\n.. versionchanged:: 1.1\n    Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n    Changed \"auto\" to use \"lloyd\" instead of \"elkan\"."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "auto", "lloyd", "elkan"]
+                        "values": ["elkan", "full", "lloyd", "auto"]
                     }
                 }
             ],
@@ -42507,7 +42507,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -42547,7 +42547,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "1024",
-                        "description": "Size of the mini batches.\nFor faster computations, you can set the ``batch_size`` greater than\n256 * number of cores to enable parallelism on all cores.\n\n.. versionchanged:: 1.0\n`batch_size` default changed from 100 to 1024."
+                        "description": "Size of the mini batches.\nFor faster computations, you can set the ``batch_size`` greater than\n256 * number of cores to enable parallelism on all cores.\n\n.. versionchanged:: 1.0\n   `batch_size` default changed from 100 to 1024."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -43432,7 +43432,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -45021,14 +45021,14 @@
                     "docstring": {
                         "type": "{'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features)",
                         "default_value": "'k-means++'",
-                        "description": "Method for initialization:\n\n- `'k-means++'` : selects initial cluster centers for k-mean\nclustering in a smart way to speed up convergence. See section\nNotes in k_init for more details.\n- `'random'`: choose `n_clusters` observations (rows) at random from data\nfor the initial centroids.\n- If an array is passed, it should be of shape `(n_clusters, n_features)`\nand gives the initial centers.\n- If a callable is passed, it should take arguments `X`, `n_clusters` and a\nrandom state and return an initialization."
+                        "description": "Method for initialization:\n\n- `'k-means++'` : selects initial cluster centers for k-mean\n  clustering in a smart way to speed up convergence. See section\n  Notes in k_init for more details.\n- `'random'`: choose `n_clusters` observations (rows) at random from data\n  for the initial centroids.\n- If an array is passed, it should be of shape `(n_clusters, n_features)`\n  and gives the initial centers.\n- If a callable is passed, it should take arguments `X`, `n_clusters` and a\n  random state and return an initialization."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["k-means++", "random"]
+                                "values": ["random", "k-means++"]
                             },
                             {
                                 "kind": "NamedType",
@@ -45166,11 +45166,11 @@
                     "docstring": {
                         "type": "{\"lloyd\", \"elkan\", \"auto\", \"full\"}",
                         "default_value": "\"lloyd\"",
-                        "description": "K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\nThe `\"elkan\"` variation can be more efficient on some datasets with\nwell-defined clusters, by using the triangle inequality. However it's\nmore memory intensive due to the allocation of an extra array of shape\n`(n_samples, n_clusters)`.\n\n`\"auto\"` and `\"full\"` are deprecated and they will be removed in\nScikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n.. versionchanged:: 0.18\nAdded Elkan algorithm\n\n.. versionchanged:: 1.1\nRenamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\nChanged \"auto\" to use \"lloyd\" instead of \"elkan\"."
+                        "description": "K-means algorithm to use. The classical EM-style algorithm is `\"lloyd\"`.\nThe `\"elkan\"` variation can be more efficient on some datasets with\nwell-defined clusters, by using the triangle inequality. However it's\nmore memory intensive due to the allocation of an extra array of shape\n`(n_samples, n_clusters)`.\n\n`\"auto\"` and `\"full\"` are deprecated and they will be removed in\nScikit-Learn 1.3. They are both aliases for `\"lloyd\"`.\n\n.. versionchanged:: 0.18\n    Added Elkan algorithm\n\n.. versionchanged:: 1.1\n    Renamed \"full\" to \"lloyd\", and deprecated \"auto\" and \"full\".\n    Changed \"auto\" to use \"lloyd\" instead of \"elkan\"."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["full", "auto", "lloyd", "elkan"]
+                        "values": ["elkan", "full", "lloyd", "auto"]
                     }
                 },
                 {
@@ -45948,7 +45948,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of jobs to use for the computation. This works by computing\neach of the n_init runs in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionadded:: 0.17\nParallel Execution using *n_jobs*."
+                        "description": "The number of jobs to use for the computation. This works by computing\neach of the n_init runs in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionadded:: 0.17\n   Parallel Execution using *n_jobs*."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -46035,7 +46035,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", `X` is assumed to be a distance matrix and must be\nsquare.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", `X` is assumed to be a distance matrix and must be\nsquare.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -46189,11 +46189,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`.\n- 'kd_tree' will use :class:`KDTree`.\n- 'brute' will use a brute-force search.\n- 'auto' (default) will attempt to decide the most appropriate\nalgorithm based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`.\n- 'kd_tree' will use :class:`KDTree`.\n- 'brute' will use a brute-force search.\n- 'auto' (default) will attempt to decide the most appropriate\n  algorithm based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -46491,7 +46491,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n\"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\nthe Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.",
+            "description": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n   \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n   the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329.",
             "docstring": "Correct for predecessors.\n\nApplies Algorithm 2 of [1]_.\n\nInput parameters are ordered by the computer OPTICS ordering.\n\n.. [1] Schubert, Erich, Michael Gertz.\n   \"Improving the Cluster Structure Extracted from OPTICS Plots.\" Proc. of\n   the Conference \"Lernen, Wissen, Daten, Analysen\" (LWDA) (2018): 318-329."
         },
         {
@@ -46590,7 +46590,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n- steep_{upward/downward}: bool array indicating whether a point is a\nsteep {upward/downward};\n- upward/downward: bool array indicating whether a point is\nupward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.",
+            "description": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n    - steep_{upward/downward}: bool array indicating whether a point is a\n      steep {upward/downward};\n    - upward/downward: bool array indicating whether a point is\n      upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.",
             "docstring": "Extend the area until it's maximal.\n\nIt's the same function for both upward and downward reagions, depending on\nthe given input parameters. Assuming:\n\n    - steep_{upward/downward}: bool array indicating whether a point is a\n      steep {upward/downward};\n    - upward/downward: bool array indicating whether a point is\n      upward/downward;\n\nTo extend an upward reagion, ``steep_point=steep_upward`` and\n``xward_point=downward`` are expected, and to extend a downward region,\n``steep_point=steep_downward`` and ``xward_point=upward``.\n\nParameters\n----------\nsteep_point : ndarray of shape (n_samples,), dtype=bool\n    True if the point is steep downward (upward).\n\nxward_point : ndarray of shape (n_samples,), dtype=bool\n    True if the point is an upward (respectively downward) point.\n\nstart : int\n    The start of the xward region.\n\nmin_samples : int\n   The same as the min_samples given to OPTICS. Up and down steep\n   regions can't have more then ``min_samples`` consecutive non-steep\n   points.\n\nReturns\n-------\nindex : int\n    The current index iterating over all the samples, i.e. where we are up\n    to in our search.\n\nend : int\n    The end of the region, which can be behind the index. The region\n    includes the ``end`` index."
         },
         {
@@ -47385,7 +47385,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", X is assumed to be a distance matrix and must be square.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string. If metric is\n\"precomputed\", X is assumed to be a distance matrix and must be square.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -47445,11 +47445,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`.\n- 'kd_tree' will use :class:`KDTree`.\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method. (default)\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`.\n- 'kd_tree' will use :class:`KDTree`.\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method. (default)\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -47544,7 +47544,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "lobpcg", "arpack"]
+                        "values": ["lobpcg", "amg", "arpack"]
                     }
                 },
                 {
@@ -47574,7 +47574,7 @@
                     "docstring": {
                         "type": "int, RandomState instance",
                         "default_value": "None",
-                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigenvectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\nWhen using `eigen_solver == 'amg'`,\nit is necessary to also fix the global numpy seed with\n`np.random.seed(int)` to get deterministic results. See\nhttps://github.com/pyamg/pyamg/issues/139 for further\ninformation."
+                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigenvectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\n    When using `eigen_solver == 'amg'`,\n    it is necessary to also fix the global numpy seed with\n    `np.random.seed(int)` to get deterministic results. See\n    https://github.com/pyamg/pyamg/issues/139 for further\n    information."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -47634,7 +47634,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'rbf'",
-                        "description": "How to construct the affinity matrix.\n- 'nearest_neighbors': construct the affinity matrix by computing a\ngraph of nearest neighbors.\n- 'rbf': construct the affinity matrix using a radial basis function\n(RBF) kernel.\n- 'precomputed': interpret ``X`` as a precomputed affinity matrix,\nwhere larger values indicate greater similarity between instances.\n- 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\nof precomputed distances, and construct a binary affinity matrix\nfrom the ``n_neighbors`` nearest neighbors of each instance.\n- one of the kernels supported by\n:func:`~sklearn.metrics.pairwise_kernels`.\n\nOnly kernels that produce similarity scores (non-negative values that\nincrease with similarity) should be used. This property is not checked\nby the clustering algorithm."
+                        "description": "How to construct the affinity matrix.\n - 'nearest_neighbors': construct the affinity matrix by computing a\n   graph of nearest neighbors.\n - 'rbf': construct the affinity matrix using a radial basis function\n   (RBF) kernel.\n - 'precomputed': interpret ``X`` as a precomputed affinity matrix,\n   where larger values indicate greater similarity between instances.\n - 'precomputed_nearest_neighbors': interpret ``X`` as a sparse graph\n   of precomputed distances, and construct a binary affinity matrix\n   from the ``n_neighbors`` nearest neighbors of each instance.\n - one of the kernels supported by\n   :func:`~sklearn.metrics.pairwise_kernels`.\n\nOnly kernels that produce similarity scores (non-negative values that\nincrease with similarity) should be used. This property is not checked\nby the clustering algorithm."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -47694,7 +47694,7 @@
                     "docstring": {
                         "type": "{'kmeans', 'discretize', 'cluster_qr'}",
                         "default_value": "'kmeans'",
-                        "description": "The strategy for assigning labels in the embedding space. There are two\nways to assign labels after the Laplacian embedding. k-means is a\npopular choice, but it can be sensitive to initialization.\nDiscretization is another approach which is less sensitive to random\ninitialization [3]_.\nThe cluster_qr method [5]_ directly extract clusters from eigenvectors\nin spectral clustering. In contrast to k-means and discretization, cluster_qr\nhas no tuning parameters and runs no iterations, yet may outperform\nk-means and discretization in terms of both quality and speed.\n\n.. versionchanged:: 1.1\nAdded new labeling method 'cluster_qr'."
+                        "description": "The strategy for assigning labels in the embedding space. There are two\nways to assign labels after the Laplacian embedding. k-means is a\npopular choice, but it can be sensitive to initialization.\nDiscretization is another approach which is less sensitive to random\ninitialization [3]_.\nThe cluster_qr method [5]_ directly extract clusters from eigenvectors\nin spectral clustering. In contrast to k-means and discretization, cluster_qr\nhas no tuning parameters and runs no iterations, yet may outperform\nk-means and discretization in terms of both quality and speed.\n\n.. versionchanged:: 1.1\n   Added new labeling method 'cluster_qr'."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -47790,7 +47790,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\nnp.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
+            "description": "Apply clustering to a projection of the normalized Laplacian.\n\nIn practice Spectral Clustering is very useful when the structure of\nthe individual clusters is highly non-convex, or more generally when\na measure of the center and spread of the cluster is not a suitable\ndescription of the complete cluster, such as when clusters are\nnested circles on the 2D plane.\n\nIf the affinity matrix is the adjacency matrix of a graph, this method\ncan be used to find normalized graph cuts [1]_, [2]_.\n\nWhen calling ``fit``, an affinity matrix is constructed using either\na kernel function such the Gaussian (aka RBF) kernel with Euclidean\ndistance ``d(X, X)``::\n\n        np.exp(-gamma * d(X,X) ** 2)\n\nor a k-nearest neighbors connectivity matrix.\n\nAlternatively, a user-provided affinity matrix can be specified by\nsetting ``affinity='precomputed'``.\n\nRead more in the :ref:`User Guide <spectral_clustering>`.",
             "docstring": ""
         },
         {
@@ -47996,7 +47996,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Find the discrete partition closest to the eigenvector embedding.\n\nThis implementation was proposed in [1]_.\n\n.. versionadded:: 1.1",
+            "description": "Find the discrete partition closest to the eigenvector embedding.\n\n    This implementation was proposed in [1]_.\n\n.. versionadded:: 1.1",
             "docstring": "Find the discrete partition closest to the eigenvector embedding.\n\n    This implementation was proposed in [1]_.\n\n.. versionadded:: 1.1\n\n    Parameters\n    ----------\n    vectors : array-like, shape: (n_samples, n_clusters)\n        The embedding space of the samples.\n\n    Returns\n    -------\n    labels : array of integers, shape: n_samples\n        The cluster labels of vectors.\n\n    References\n    ----------\n    .. [1] :doi:`Simple, direct, and efficient multi-way spectral clustering, 2019\n        Anil Damle, Victor Minden, Lexing Ying\n        <10.1093/imaiai/iay008>`"
         },
         {
@@ -48122,7 +48122,7 @@
                     "docstring": {
                         "type": "{array-like, sparse matrix} of shape (n_samples, n_samples)",
                         "default_value": "",
-                        "description": "The affinity matrix describing the relationship of the samples to\nembed. **Must be symmetric**.\n\nPossible examples:\n- adjacency matrix of a graph,\n- heat kernel of the pairwise distance matrix of the samples,\n- symmetric k-nearest neighbours connectivity matrix of the samples."
+                        "description": "The affinity matrix describing the relationship of the samples to\nembed. **Must be symmetric**.\n\nPossible examples:\n  - adjacency matrix of a graph,\n  - heat kernel of the pairwise distance matrix of the samples,\n  - symmetric k-nearest neighbours connectivity matrix of the samples."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -48186,7 +48186,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "lobpcg", "arpack"]
+                        "values": ["lobpcg", "amg", "arpack"]
                     }
                 },
                 {
@@ -48199,7 +48199,7 @@
                     "docstring": {
                         "type": "int, RandomState instance",
                         "default_value": "None",
-                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigenvectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\nWhen using `eigen_solver == 'amg'`,\nit is necessary to also fix the global numpy seed with\n`np.random.seed(int)` to get deterministic results. See\nhttps://github.com/pyamg/pyamg/issues/139 for further\ninformation."
+                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigenvectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\n    When using `eigen_solver == 'amg'`,\n    it is necessary to also fix the global numpy seed with\n    `np.random.seed(int)` to get deterministic results. See\n    https://github.com/pyamg/pyamg/issues/139 for further\n    information."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -48259,7 +48259,7 @@
                     "docstring": {
                         "type": "{'kmeans', 'discretize', 'cluster_qr'}",
                         "default_value": "'kmeans'",
-                        "description": "The strategy to use to assign labels in the embedding\nspace.  There are three ways to assign labels after the Laplacian\nembedding.  k-means can be applied and is a popular choice. But it can\nalso be sensitive to initialization. Discretization is another\napproach which is less sensitive to random initialization [3]_.\nThe cluster_qr method [5]_ directly extracts clusters from eigenvectors\nin spectral clustering. In contrast to k-means and discretization, cluster_qr\nhas no tuning parameters and is not an iterative method, yet may outperform\nk-means and discretization in terms of both quality and speed.\n\n.. versionchanged:: 1.1\nAdded new labeling method 'cluster_qr'."
+                        "description": "The strategy to use to assign labels in the embedding\nspace.  There are three ways to assign labels after the Laplacian\nembedding.  k-means can be applied and is a popular choice. But it can\nalso be sensitive to initialization. Discretization is another\napproach which is less sensitive to random initialization [3]_.\nThe cluster_qr method [5]_ directly extracts clusters from eigenvectors\nin spectral clustering. In contrast to k-means and discretization, cluster_qr\nhas no tuning parameters and is not an iterative method, yet may outperform\nk-means and discretization in terms of both quality and speed.\n\n.. versionchanged:: 1.1\n   Added new labeling method 'cluster_qr'."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -48361,7 +48361,7 @@
                     "docstring": {
                         "type": "list of tuples",
                         "default_value": "",
-                        "description": "List of (name, transformer, columns) tuples specifying the\ntransformer objects to be applied to subsets of the data.\n\nname : str\nLike in Pipeline and FeatureUnion, this allows the transformer and\nits parameters to be set using ``set_params`` and searched in grid\nsearch.\ntransformer : {'drop', 'passthrough'} or estimator\nEstimator must support :term:`fit` and :term:`transform`.\nSpecial-cased strings 'drop' and 'passthrough' are accepted as\nwell, to indicate to drop the columns or to pass them through\nuntransformed, respectively.\ncolumns :  str, array-like of str, int, array-like of int,                 array-like of bool, slice or callable\nIndexes the data on its second axis. Integers are interpreted as\npositional columns, while strings can reference DataFrame columns\nby name.  A scalar string or int should be used where\n``transformer`` expects X to be a 1d array-like (vector),\notherwise a 2d array will be passed to the transformer.\nA callable is passed the input data `X` and can return any of the\nabove. To select multiple columns by name or dtype, you can use\n:obj:`make_column_selector`."
+                        "description": "List of (name, transformer, columns) tuples specifying the\ntransformer objects to be applied to subsets of the data.\n\nname : str\n    Like in Pipeline and FeatureUnion, this allows the transformer and\n    its parameters to be set using ``set_params`` and searched in grid\n    search.\ntransformer : {'drop', 'passthrough'} or estimator\n    Estimator must support :term:`fit` and :term:`transform`.\n    Special-cased strings 'drop' and 'passthrough' are accepted as\n    well, to indicate to drop the columns or to pass them through\n    untransformed, respectively.\ncolumns :  str, array-like of str, int, array-like of int,                 array-like of bool, slice or callable\n    Indexes the data on its second axis. Integers are interpreted as\n    positional columns, while strings can reference DataFrame columns\n    by name.  A scalar string or int should be used where\n    ``transformer`` expects X to be a 1d array-like (vector),\n    otherwise a 2d array will be passed to the transformer.\n    A callable is passed the input data `X` and can return any of the\n    above. To select multiple columns by name or dtype, you can use\n    :obj:`make_column_selector`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -48385,7 +48385,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["passthrough", "drop"]
+                                "values": ["drop", "passthrough"]
                             },
                             {
                                 "kind": "NamedType",
@@ -49383,7 +49383,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -49811,7 +49811,7 @@
                     "docstring": {
                         "type": "tuples",
                         "default_value": "",
-                        "description": "Tuples of the form (transformer, columns) specifying the\ntransformer objects to be applied to subsets of the data.\n\ntransformer : {'drop', 'passthrough'} or estimator\nEstimator must support :term:`fit` and :term:`transform`.\nSpecial-cased strings 'drop' and 'passthrough' are accepted as\nwell, to indicate to drop the columns or to pass them through\nuntransformed, respectively.\ncolumns : str,  array-like of str, int, array-like of int, slice,                 array-like of bool or callable\nIndexes the data on its second axis. Integers are interpreted as\npositional columns, while strings can reference DataFrame columns\nby name. A scalar string or int should be used where\n``transformer`` expects X to be a 1d array-like (vector),\notherwise a 2d array will be passed to the transformer.\nA callable is passed the input data `X` and can return any of the\nabove. To select multiple columns by name or dtype, you can use\n:obj:`make_column_selector`."
+                        "description": "Tuples of the form (transformer, columns) specifying the\ntransformer objects to be applied to subsets of the data.\n\ntransformer : {'drop', 'passthrough'} or estimator\n    Estimator must support :term:`fit` and :term:`transform`.\n    Special-cased strings 'drop' and 'passthrough' are accepted as\n    well, to indicate to drop the columns or to pass them through\n    untransformed, respectively.\ncolumns : str,  array-like of str, int, array-like of int, slice,                 array-like of bool or callable\n    Indexes the data on its second axis. Integers are interpreted as\n    positional columns, while strings can reference DataFrame columns\n    by name. A scalar string or int should be used where\n    ``transformer`` expects X to be a 1d array-like (vector),\n    otherwise a 2d array will be passed to the transformer.\n    A callable is passed the input data `X` and can return any of the\n    above. To select multiple columns by name or dtype, you can use\n    :obj:`make_column_selector`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -49835,7 +49835,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["passthrough", "drop"]
+                                "values": ["drop", "passthrough"]
                             },
                             {
                                 "kind": "NamedType",
@@ -50028,7 +50028,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\nregressor.fit(X, func(y))\n\nor::\n\nregressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\ninverse_func(regressor.predict(X))\n\nor::\n\ntransformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20",
+            "description": "Meta-estimator to regress on a transformed target.\n\nUseful for applying a non-linear transformation to the target `y` in\nregression problems. This transformation can be given as a Transformer\nsuch as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a\nfunction and its inverse such as `np.log` and `np.exp`.\n\nThe computation during :meth:`fit` is::\n\n    regressor.fit(X, func(y))\n\nor::\n\n    regressor.fit(X, transformer.transform(y))\n\nThe computation during :meth:`predict` is::\n\n    inverse_func(regressor.predict(X))\n\nor::\n\n    transformer.inverse_transform(regressor.predict(X))\n\nRead more in the :ref:`User Guide <transformed_target_regressor>`.\n\n.. versionadded:: 0.20",
             "docstring": ""
         },
         {
@@ -51479,7 +51479,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\nGraphLasso has been renamed to GraphicalLasso",
+            "description": "Sparse inverse covariance estimation with an l1-penalized estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLasso has been renamed to GraphicalLasso",
             "docstring": ""
         },
         {
@@ -51620,7 +51620,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.20\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.20\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -51718,7 +51718,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -51763,7 +51763,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\nGraphLassoCV has been renamed to GraphicalLassoCV",
+            "description": "Sparse inverse covariance w/ cross-validated choice of the l1 penalty.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    GraphLassoCV has been renamed to GraphicalLassoCV",
             "docstring": ""
         },
         {
@@ -52315,7 +52315,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.covariance"],
-            "description": "L1-penalized covariance estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\ngraph_lasso has been renamed to graphical_lasso",
+            "description": "L1-penalized covariance estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    graph_lasso has been renamed to graphical_lasso",
             "docstring": "L1-penalized covariance estimator.\n\nRead more in the :ref:`User Guide <sparse_inverse_covariance>`.\n\n.. versionchanged:: v0.20\n    graph_lasso has been renamed to graphical_lasso\n\nParameters\n----------\nemp_cov : ndarray of shape (n_features, n_features)\n    Empirical covariance from which to compute the covariance estimate.\n\nalpha : float\n    The regularization parameter: the higher alpha, the more\n    regularization, the sparser the inverse covariance.\n    Range is (0, inf].\n\ncov_init : array of shape (n_features, n_features), default=None\n    The initial guess for the covariance. If None, then the empirical\n    covariance is used.\n\nmode : {'cd', 'lars'}, default='cd'\n    The Lasso solver to use: coordinate descent or LARS. Use LARS for\n    very sparse underlying graphs, where p > n. Elsewhere prefer cd\n    which is more numerically stable.\n\ntol : float, default=1e-4\n    The tolerance to declare convergence: if the dual gap goes below\n    this value, iterations are stopped. Range is (0, inf].\n\nenet_tol : float, default=1e-4\n    The tolerance for the elastic net solver used to calculate the descent\n    direction. This parameter controls the accuracy of the search direction\n    for a given column update, not of the overall parameter estimate. Only\n    used for mode='cd'. Range is (0, inf].\n\nmax_iter : int, default=100\n    The maximum number of iterations.\n\nverbose : bool, default=False\n    If verbose is True, the objective function and dual gap are\n    printed at each iteration.\n\nreturn_costs : bool, default=Flase\n    If return_costs is True, the objective function and dual gap\n    at each iteration are returned.\n\neps : float, default=eps\n    The machine-precision regularization in the computation of the\n    Cholesky diagonal factors. Increase this for very ill-conditioned\n    systems. Default is `np.finfo(np.float64).eps`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\nReturns\n-------\ncovariance : ndarray of shape (n_features, n_features)\n    The estimated covariance matrix.\n\nprecision : ndarray of shape (n_features, n_features)\n    The estimated (sparse) precision matrix.\n\ncosts : list of (objective, dual_gap) pairs\n    The list of values of the objective function and the dual gap at\n    each iteration. Returned only if return_costs is True.\n\nn_iter : int\n    Number of iterations. Returned only if `return_n_iter` is set to True.\n\nSee Also\n--------\nGraphicalLasso : Sparse inverse covariance estimation\n    with an l1-penalized estimator.\nGraphicalLassoCV : Sparse inverse covariance with\n    cross-validated choice of the l1 penalty.\n\nNotes\n-----\nThe algorithm employed to solve this problem is the GLasso algorithm,\nfrom the Friedman 2008 Biostatistics paper. It is the same algorithm\nas in the R `glasso` package.\n\nOne possible difference with the `glasso` R package is that the\ndiagonal coefficients are not penalized."
         },
         {
@@ -53150,7 +53150,7 @@
                     "docstring": {
                         "type": "int or tuple of shape (2,)",
                         "default_value": "",
-                        "description": "Number of different initial sets of observations from which to\nrun the algorithm. This parameter should be a strictly positive\ninteger.\nInstead of giving a number of trials to perform, one can provide a\nlist of initial estimates that will be used to iteratively run\nc_step procedures. In this case:\n- n_trials[0]: array-like, shape (n_trials, n_features)\nis the list of `n_trials` initial location estimates\n- n_trials[1]: array-like, shape (n_trials, n_features, n_features)\nis the list of `n_trials` initial covariances estimates"
+                        "description": "Number of different initial sets of observations from which to\nrun the algorithm. This parameter should be a strictly positive\ninteger.\nInstead of giving a number of trials to perform, one can provide a\nlist of initial estimates that will be used to iteratively run\nc_step procedures. In this case:\n- n_trials[0]: array-like, shape (n_trials, n_features)\n  is the list of `n_trials` initial location estimates\n- n_trials[1]: array-like, shape (n_trials, n_features, n_features)\n  is the list of `n_trials` initial covariances estimates"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -54012,7 +54012,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nipals", "svd"]
+                        "values": ["svd", "nipals"]
                     }
                 },
                 {
@@ -55966,7 +55966,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load and return the Boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\n.. warning::\nThe Boston housing prices dataset has an ethical problem: as\ninvestigated in [1]_, the authors of this dataset engineered a\nnon-invertible variable \"B\" assuming that racial self-segregation had a\npositive impact on house prices [2]_. Furthermore the goal of the\nresearch that led to the creation of this dataset was to study the\nimpact of air quality but it did not give adequate demonstration of the\nvalidity of this assumption.\n\nThe scikit-learn maintainers therefore strongly discourage the use of\nthis dataset unless the purpose of the code is to study and educate\nabout ethical issues in data science and machine learning.\n\nIn this special case, you can fetch the dataset from the original\nsource::\n\nimport pandas as pd  # doctest: +SKIP\nimport numpy as np\n\ndata_url = \"http://lib.stat.cmu.edu/datasets/boston\"\nraw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\ndata = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\ntarget = raw_df.values[1::2, 2]\n\nAlternative datasets include the California housing dataset [3]_\n(i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\nhousing dataset [4]_. You can load the datasets as follows::\n\nfrom sklearn.datasets import fetch_california_housing\nhousing = fetch_california_housing()\n\nfor the California housing dataset and::\n\nfrom sklearn.datasets import fetch_openml\nhousing = fetch_openml(name=\"house_prices\", as_frame=True)\n\nfor the Ames housing dataset.",
+            "description": "Load and return the Boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\n.. warning::\n    The Boston housing prices dataset has an ethical problem: as\n    investigated in [1]_, the authors of this dataset engineered a\n    non-invertible variable \"B\" assuming that racial self-segregation had a\n    positive impact on house prices [2]_. Furthermore the goal of the\n    research that led to the creation of this dataset was to study the\n    impact of air quality but it did not give adequate demonstration of the\n    validity of this assumption.\n\n    The scikit-learn maintainers therefore strongly discourage the use of\n    this dataset unless the purpose of the code is to study and educate\n    about ethical issues in data science and machine learning.\n\n    In this special case, you can fetch the dataset from the original\n    source::\n\n        import pandas as pd  # doctest: +SKIP\n        import numpy as np\n\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n        raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n        target = raw_df.values[1::2, 2]\n\n    Alternative datasets include the California housing dataset [3]_\n    (i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\n    housing dataset [4]_. You can load the datasets as follows::\n\n        from sklearn.datasets import fetch_california_housing\n        housing = fetch_california_housing()\n\n    for the California housing dataset and::\n\n        from sklearn.datasets import fetch_openml\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n    for the Ames housing dataset.",
             "docstring": "Load and return the Boston house-prices dataset (regression).\n\n==============   ==============\nSamples total               506\nDimensionality               13\nFeatures         real, positive\nTargets           real 5. - 50.\n==============   ==============\n\nRead more in the :ref:`User Guide <boston_dataset>`.\n\n.. warning::\n    The Boston housing prices dataset has an ethical problem: as\n    investigated in [1]_, the authors of this dataset engineered a\n    non-invertible variable \"B\" assuming that racial self-segregation had a\n    positive impact on house prices [2]_. Furthermore the goal of the\n    research that led to the creation of this dataset was to study the\n    impact of air quality but it did not give adequate demonstration of the\n    validity of this assumption.\n\n    The scikit-learn maintainers therefore strongly discourage the use of\n    this dataset unless the purpose of the code is to study and educate\n    about ethical issues in data science and machine learning.\n\n    In this special case, you can fetch the dataset from the original\n    source::\n\n        import pandas as pd  # doctest: +SKIP\n        import numpy as np\n\n        data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n        raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n        target = raw_df.values[1::2, 2]\n\n    Alternative datasets include the California housing dataset [3]_\n    (i.e. :func:`~sklearn.datasets.fetch_california_housing`) and Ames\n    housing dataset [4]_. You can load the datasets as follows::\n\n        from sklearn.datasets import fetch_california_housing\n        housing = fetch_california_housing()\n\n    for the California housing dataset and::\n\n        from sklearn.datasets import fetch_openml\n        housing = fetch_openml(name=\"house_prices\", as_frame=True)\n\n    for the Ames housing dataset.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object.\n    See below for more information about the `data` and `target` object.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : ndarray of shape (506, 13)\n        The data matrix.\n    target : ndarray of shape (506,)\n        The regression target.\n    filename : str\n        The physical location of boston csv dataset.\n\n        .. versionadded:: 0.20\n\n    DESCR : str\n        The full description of the dataset.\n    feature_names : ndarray\n        The names of features\n\n(data, target) : tuple if ``return_X_y`` is True\n    A tuple of two ndarrays. The first contains a 2D array of shape (506, 13)\n    with each row representing one sample and each column representing the features.\n    The second array of shape (506,) contains the target samples.\n\n    .. versionadded:: 0.18\n\nNotes\n-----\n    .. versionchanged:: 0.20\n        Fixed a wrong data point at [445, 0].\n\nReferences\n----------\n.. [1] `Racist data destruction? M Carlisle,\n        <https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>`_\n.. [2] `Harrison Jr, David, and Daniel L. Rubinfeld.\n       \"Hedonic housing prices and the demand for clean air.\"\n       Journal of environmental economics and management 5.1 (1978): 81-102.\n       <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>`_\n.. [3] `California housing dataset\n        <https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset>`_\n.. [4] `Ames housing dataset\n        <https://www.openml.org/d/42165>`_\n\nExamples\n--------\n>>> import warnings\n>>> from sklearn.datasets import load_boston\n>>> with warnings.catch_warnings():\n...     # You should probably not use this dataset.\n...     warnings.filterwarnings(\"ignore\")\n...     X, y = load_boston(return_X_y=True)\n>>> print(X.shape)\n(506, 13)"
         },
         {
@@ -56232,7 +56232,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load and return the diabetes dataset (regression).\n\n==============   ==================\nSamples total    442\nDimensionality   10\nFeatures         real, -.2 < x < .2\nTargets          integer 25 - 346\n==============   ==================\n\n.. note::\nThe meaning of each feature (i.e. `feature_names`) might be unclear\n(especially for `ltg`) as the documentation of the original dataset is\nnot explicit. We provide information that seems correct in regard with\nthe scientific literature in this field of research.\n\nRead more in the :ref:`User Guide <diabetes_dataset>`.",
+            "description": "Load and return the diabetes dataset (regression).\n\n==============   ==================\nSamples total    442\nDimensionality   10\nFeatures         real, -.2 < x < .2\nTargets          integer 25 - 346\n==============   ==================\n\n.. note::\n   The meaning of each feature (i.e. `feature_names`) might be unclear\n   (especially for `ltg`) as the documentation of the original dataset is\n   not explicit. We provide information that seems correct in regard with\n   the scientific literature in this field of research.\n\nRead more in the :ref:`User Guide <diabetes_dataset>`.",
             "docstring": "Load and return the diabetes dataset (regression).\n\n==============   ==================\nSamples total    442\nDimensionality   10\nFeatures         real, -.2 < x < .2\nTargets          integer 25 - 346\n==============   ==================\n\n.. note::\n   The meaning of each feature (i.e. `feature_names`) might be unclear\n   (especially for `ltg`) as the documentation of the original dataset is\n   not explicit. We provide information that seems correct in regard with\n   the scientific literature in this field of research.\n\nRead more in the :ref:`User Guide <diabetes_dataset>`.\n\nParameters\n----------\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object.\n    See below for more information about the `data` and `target` object.\n\n    .. versionadded:: 0.18\n\nas_frame : bool, default=False\n    If True, the data is a pandas DataFrame including columns with\n    appropriate dtypes (numeric). The target is\n    a pandas DataFrame or Series depending on the number of target columns.\n    If `return_X_y` is True, then (`data`, `target`) will be pandas\n    DataFrames or Series as described below.\n\n    .. versionadded:: 0.23\n\nscaled : bool, default=True\n    If True, the feature variables are mean centered and scaled by the\n    standard deviation times the square root of `n_samples`.\n    If False, raw data is returned for the feature variables.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : {ndarray, dataframe} of shape (442, 10)\n        The data matrix. If `as_frame=True`, `data` will be a pandas\n        DataFrame.\n    target: {ndarray, Series} of shape (442,)\n        The regression target. If `as_frame=True`, `target` will be\n        a pandas Series.\n    feature_names: list\n        The names of the dataset columns.\n    frame: DataFrame of shape (442, 11)\n        Only present when `as_frame=True`. DataFrame with `data` and\n        `target`.\n\n        .. versionadded:: 0.23\n    DESCR: str\n        The full description of the dataset.\n    data_filename: str\n        The path to the location of the data.\n    target_filename: str\n        The path to the location of the target.\n\n(data, target) : tuple if ``return_X_y`` is True\n    Returns a tuple of two ndarray of shape (n_samples, n_features)\n    A 2D array with each row representing one sample and each column\n    representing the features and/or target of a given sample.\n\n    .. versionadded:: 0.18"
         },
         {
@@ -56421,7 +56421,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["strict", "replace", "ignore"]
+                        "values": ["strict", "ignore", "replace"]
                     }
                 },
                 {
@@ -56475,7 +56475,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\ncontainer_folder/\ncategory_1_folder/\nfile_1.txt\nfile_2.txt\n...\nfile_42.txt\ncategory_2_folder/\nfile_43.txt\nfile_44.txt\n...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nIf you want files with a specific file extension (e.g. `.txt`) then you\ncan pass a list of those file extensions to `allowed_extensions`.\n\nRead more in the :ref:`User Guide <datasets>`.",
+            "description": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n    container_folder/\n        category_1_folder/\n            file_1.txt\n            file_2.txt\n            ...\n            file_42.txt\n        category_2_folder/\n            file_43.txt\n            file_44.txt\n            ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nIf you want files with a specific file extension (e.g. `.txt`) then you\ncan pass a list of those file extensions to `allowed_extensions`.\n\nRead more in the :ref:`User Guide <datasets>`.",
             "docstring": "Load text files with categories as subfolder names.\n\nIndividual samples are assumed to be files stored a two levels folder\nstructure such as the following:\n\n    container_folder/\n        category_1_folder/\n            file_1.txt\n            file_2.txt\n            ...\n            file_42.txt\n        category_2_folder/\n            file_43.txt\n            file_44.txt\n            ...\n\nThe folder names are used as supervised signal label names. The individual\nfile names are not important.\n\nThis function does not try to extract features into a numpy array or scipy\nsparse matrix. In addition, if load_content is false it does not try to\nload the files in memory.\n\nTo use text files in a scikit-learn classification or clustering algorithm,\nyou will need to use the :mod`~sklearn.feature_extraction.text` module to\nbuild a feature extraction transformer that suits your problem.\n\nIf you set load_content=True, you should also specify the encoding of the\ntext using the 'encoding' parameter. For many modern text files, 'utf-8'\nwill be the correct encoding. If you leave encoding equal to None, then the\ncontent will be made of bytes instead of Unicode, and you will not be able\nto use most functions in :mod:`~sklearn.feature_extraction.text`.\n\nSimilar feature extractors should be built for other kind of unstructured\ndata input such as images, audio, video, ...\n\nIf you want files with a specific file extension (e.g. `.txt`) then you\ncan pass a list of those file extensions to `allowed_extensions`.\n\nRead more in the :ref:`User Guide <datasets>`.\n\nParameters\n----------\ncontainer_path : str\n    Path to the main folder holding one subfolder per category.\n\ndescription : str, default=None\n    A paragraph describing the characteristic of the dataset: its source,\n    reference, etc.\n\ncategories : list of str, default=None\n    If None (default), load all the categories. If not None, list of\n    category names to load (other categories ignored).\n\nload_content : bool, default=True\n    Whether to load or not the content of the different files. If true a\n    'data' attribute containing the text information is present in the data\n    structure returned. If not, a filenames attribute gives the path to the\n    files.\n\nshuffle : bool, default=True\n    Whether or not to shuffle the data: might be important for models that\n    make the assumption that the samples are independent and identically\n    distributed (i.i.d.), such as stochastic gradient descent.\n\nencoding : str, default=None\n    If None, do not try to decode the content of the files (e.g. for images\n    or other non-text content). If not None, encoding to use to decode text\n    files to Unicode if load_content is True.\n\ndecode_error : {'strict', 'ignore', 'replace'}, default='strict'\n    Instruction on what to do if a byte sequence is given to analyze that\n    contains characters not of the given `encoding`. Passed as keyword\n    argument 'errors' to bytes.decode.\n\nrandom_state : int, RandomState instance or None, default=0\n    Determines random number generation for dataset shuffling. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nallowed_extensions : list of str, default=None\n    List of desired file extensions to filter the files to be loaded.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : list of str\n        Only present when `load_content=True`.\n        The raw text data to learn.\n    target : ndarray\n        The target labels (integer index).\n    target_names : list\n        The names of target classes.\n    DESCR : str\n        The full description of the dataset.\n    filenames: ndarray\n        The filenames holding the dataset."
         },
         {
@@ -57114,7 +57114,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SF", "SA", "smtp", "http"]
+                        "values": ["smtp", "SA", "SF", "http"]
                     }
                 },
                 {
@@ -57566,7 +57566,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "10_folds"]
+                        "values": ["10_folds", "train", "test"]
                     }
                 },
                 {
@@ -57675,7 +57675,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                   2\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task.  As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n.. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.",
+            "description": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                   2\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task.  As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n  .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.",
             "docstring": "Load the Labeled Faces in the Wild (LFW) pairs dataset (classification).\n\nDownload it if necessary.\n\n=================   =======================\nClasses                                   2\nSamples total                         13233\nDimensionality                         5828\nFeatures            real, between 0 and 255\n=================   =======================\n\nIn the official `README.txt`_ this task is described as the\n\"Restricted\" task.  As I am not sure as to implement the\n\"Unrestricted\" variant correctly, I left it as unsupported for now.\n\n  .. _`README.txt`: http://vis-www.cs.umass.edu/lfw/README.txt\n\nThe original images are 250 x 250 pixels, but the default slice and resize\narguments reduce them to 62 x 47.\n\nRead more in the :ref:`User Guide <labeled_faces_in_the_wild_dataset>`.\n\nParameters\n----------\nsubset : {'train', 'test', '10_folds'}, default='train'\n    Select the dataset to load: 'train' for the development training\n    set, 'test' for the development test set, and '10_folds' for the\n    official evaluation set that is meant to be used with a 10-folds\n    cross validation.\n\ndata_home : str, default=None\n    Specify another download and cache folder for the datasets. By\n    default all scikit-learn data is stored in '~/scikit_learn_data'\n    subfolders.\n\nfunneled : bool, default=True\n    Download and use the funneled variant of the dataset.\n\nresize : float, default=0.5\n    Ratio used to resize the each face picture.\n\ncolor : bool, default=False\n    Keep the 3 RGB channels instead of averaging them to a single\n    gray level channel. If color is True the shape of the data has\n    one more dimension than the shape with color = False.\n\nslice_ : tuple of slice, default=(slice(70, 195), slice(78, 172))\n    Provide a custom 2D slice (height, width) to extract the\n    'interesting' part of the jpeg files and avoid use statistical\n    correlation from the background\n\ndownload_if_missing : bool, default=True\n    If False, raise a IOError if the data is not locally available\n    instead of trying to download the data from the source site.\n\nReturns\n-------\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : ndarray of shape (2200, 5828). Shape depends on ``subset``.\n        Each row corresponds to 2 ravel'd face images\n        of original size 62 x 47 pixels.\n        Changing the ``slice_``, ``resize`` or ``subset`` parameters\n        will change the shape of the output.\n    pairs : ndarray of shape (2200, 2, 62, 47). Shape depends on ``subset``\n        Each row has 2 face images corresponding\n        to same or different person from the dataset\n        containing 5749 people. Changing the ``slice_``,\n        ``resize`` or ``subset`` parameters will change the shape of the\n        output.\n    target : numpy array of shape (2200,). Shape depends on ``subset``.\n        Labels associated to each pair of images.\n        The two label values being different persons or the same person.\n    DESCR : str\n        Description of the Labeled Faces in the Wild (LFW) dataset."
         },
         {
@@ -59212,7 +59212,7 @@
                     "docstring": {
                         "type": "bool or 'auto'",
                         "default_value": "'auto'",
-                        "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\nThe Bunch will contain a ``frame`` attribute with the target and the\ndata. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\nDataFrames or Series as describe above.\n\nIf as_frame is 'auto', the data and target will be converted to\nDataFrame or Series as if as_frame is set to True, unless the dataset\nis stored in sparse format.\n\n.. versionchanged:: 0.24\nThe default value of `as_frame` changed from `False` to `'auto'`\nin 0.24."
+                        "description": "If True, the data is a pandas DataFrame including columns with\nappropriate dtypes (numeric, string or categorical). The target is\na pandas DataFrame or Series depending on the number of target_columns.\nThe Bunch will contain a ``frame`` attribute with the target and the\ndata. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\nDataFrames or Series as describe above.\n\nIf as_frame is 'auto', the data and target will be converted to\nDataFrame or Series as if as_frame is set to True, unless the dataset\nis stored in sparse format.\n\n.. versionchanged:: 0.24\n   The default value of `as_frame` changed from `False` to `'auto'`\n   in 0.24."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -59266,7 +59266,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\nThe API is experimental (particularly the return value structure),\nand might have small backward-incompatible changes without notice\nor warning in future releases.",
+            "description": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n    The API is experimental (particularly the return value structure),\n    and might have small backward-incompatible changes without notice\n    or warning in future releases.",
             "docstring": "Fetch dataset from openml by name or dataset id.\n\nDatasets are uniquely identified by either an integer ID or by a\ncombination of name and version (i.e. there might be multiple\nversions of the 'iris' dataset). Please give either name or data_id\n(not both). In case a name is given, a version can also be\nprovided.\n\nRead more in the :ref:`User Guide <openml>`.\n\n.. versionadded:: 0.20\n\n.. note:: EXPERIMENTAL\n\n    The API is experimental (particularly the return value structure),\n    and might have small backward-incompatible changes without notice\n    or warning in future releases.\n\nParameters\n----------\nname : str, default=None\n    String identifier of the dataset. Note that OpenML can have multiple\n    datasets with the same name.\n\nversion : int or 'active', default='active'\n    Version of the dataset. Can only be provided if also ``name`` is given.\n    If 'active' the oldest version that's still active is used. Since\n    there may be more than one active version of a dataset, and those\n    versions may fundamentally be different from one another, setting an\n    exact version is highly recommended.\n\ndata_id : int, default=None\n    OpenML ID of the dataset. The most specific way of retrieving a\n    dataset. If data_id is not given, name (and potential version) are\n    used to obtain a dataset.\n\ndata_home : str, default=None\n    Specify another download and cache folder for the data sets. By default\n    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.\n\ntarget_column : str, list or None, default='default-target'\n    Specify the column name in the data to use as target. If\n    'default-target', the standard target column a stored on the server\n    is used. If ``None``, all columns are returned as data and the\n    target is ``None``. If list (of strings), all columns with these names\n    are returned as multi-target (Note: not all scikit-learn classifiers\n    can handle all types of multi-output combinations).\n\ncache : bool, default=True\n    Whether to cache the downloaded datasets into `data_home`.\n\nreturn_X_y : bool, default=False\n    If True, returns ``(data, target)`` instead of a Bunch object. See\n    below for more information about the `data` and `target` objects.\n\nas_frame : bool or 'auto', default='auto'\n    If True, the data is a pandas DataFrame including columns with\n    appropriate dtypes (numeric, string or categorical). The target is\n    a pandas DataFrame or Series depending on the number of target_columns.\n    The Bunch will contain a ``frame`` attribute with the target and the\n    data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas\n    DataFrames or Series as describe above.\n\n    If as_frame is 'auto', the data and target will be converted to\n    DataFrame or Series as if as_frame is set to True, unless the dataset\n    is stored in sparse format.\n\n    .. versionchanged:: 0.24\n       The default value of `as_frame` changed from `False` to `'auto'`\n       in 0.24.\n\nn_retries : int, default=3\n    Number of retries when HTTP errors or network timeouts are encountered.\n    Error with status code 412 won't be retried as they represent OpenML\n    generic errors.\n\ndelay : float, default=1.0\n    Number of seconds between retries.\n\nReturns\n-------\n\ndata : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame\n        The feature matrix. Categorical features are encoded as ordinals.\n    target : np.array, pandas Series or DataFrame\n        The regression target or classification labels, if applicable.\n        Dtype is float if numeric, and object if categorical. If\n        ``as_frame`` is True, ``target`` is a pandas object.\n    DESCR : str\n        The full description of the dataset.\n    feature_names : list\n        The names of the dataset columns.\n    target_names: list\n        The names of the target columns.\n\n    .. versionadded:: 0.22\n\n    categories : dict or None\n        Maps each categorical feature name to a list of values, such\n        that the value encoded as i is ith in the list. If ``as_frame``\n        is True, this is None.\n    details : dict\n        More metadata from OpenML.\n    frame : pandas DataFrame\n        Only present when `as_frame=True`. DataFrame with ``data`` and\n        ``target``.\n\n(data, target) : tuple if ``return_X_y`` is True\n\n    .. note:: EXPERIMENTAL\n\n        This interface is **experimental** and subsequent releases may\n        change attributes without notice (although there should only be\n        minor changes to ``data`` and ``target``).\n\n    Missing values in the 'data' are represented as NaN's. Missing values\n    in 'target' are represented as NaN's (numerical target) or None\n    (categorical target)."
         },
         {
@@ -59374,7 +59374,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["all", "train", "test"]
                     }
                 },
                 {
@@ -59722,7 +59722,7 @@
                     "docstring": {
                         "type": "int or array-like",
                         "default_value": "100",
-                        "description": "If int, it is the total number of points equally divided among\nclusters.\nIf array-like, each element of the sequence indicates\nthe number of samples per cluster.\n\n.. versionchanged:: v0.20\none can now pass an array-like to the ``n_samples`` parameter"
+                        "description": "If int, it is the total number of points equally divided among\nclusters.\nIf array-like, each element of the sequence indicates\nthe number of samples per cluster.\n\n.. versionchanged:: v0.20\n    one can now pass an array-like to the ``n_samples`` parameter"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -60069,7 +60069,7 @@
                     "docstring": {
                         "type": "int or tuple of shape (2,), dtype=int",
                         "default_value": "100",
-                        "description": "If int, it is the total number of points generated.\nFor odd numbers, the inner circle will have one point more than the\nouter circle.\nIf two-element tuple, number of points in outer circle and inner\ncircle.\n\n.. versionchanged:: 0.23\nAdded two-element tuple."
+                        "description": "If int, it is the total number of points generated.\nFor odd numbers, the inner circle will have one point more than the\nouter circle.\nIf two-element tuple, number of points in outer circle and inner\ncircle.\n\n.. versionchanged:: 0.23\n   Added two-element tuple."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -60588,7 +60588,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\ny(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n    y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate the \"Friedman #1\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are independent features uniformly distributed on the interval\n[0, 1]. The output `y` is created according to the formula::\n\n    y(X) = 10 * sin(pi * X[:, 0] * X[:, 1]) + 20 * (X[:, 2] - 0.5) ** 2 + 10 * X[:, 3] + 5 * X[:, 4] + noise * N(0, 1).\n\nOut of the `n_features` features, only 5 are actually used to compute\n`y`. The remaining features are independent of `y`.\n\nThe number of features has to be >= 5.\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nn_features : int, default=10\n    The number of features. Should be at least 5.\n\nnoise : float, default=0.0\n    The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset noise. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The input samples.\n\ny : ndarray of shape (n_samples,)\n    The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n       of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n       pages 123-140, 1996."
         },
         {
@@ -60665,7 +60665,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n0 <= X[:, 0] <= 100,\n40 * pi <= X[:, 1] <= 560 * pi,\n0 <= X[:, 2] <= 1,\n1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\ny(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2]  - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n    0 <= X[:, 0] <= 100,\n    40 * pi <= X[:, 1] <= 560 * pi,\n    0 <= X[:, 2] <= 1,\n    1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n    y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2]  - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate the \"Friedman #2\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n    0 <= X[:, 0] <= 100,\n    40 * pi <= X[:, 1] <= 560 * pi,\n    0 <= X[:, 2] <= 1,\n    1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n    y(X) = (X[:, 0] ** 2 + (X[:, 1] * X[:, 2]  - 1 / (X[:, 1] * X[:, 3])) ** 2) ** 0.5 + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nnoise : float, default=0.0\n    The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset noise. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n    The input samples.\n\ny : ndarray of shape (n_samples,)\n    The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n       of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n       pages 123-140, 1996."
         },
         {
@@ -60742,7 +60742,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n0 <= X[:, 0] <= 100,\n40 * pi <= X[:, 1] <= 560 * pi,\n0 <= X[:, 2] <= 1,\n1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\ny(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n    0 <= X[:, 0] <= 100,\n    40 * pi <= X[:, 1] <= 560 * pi,\n    0 <= X[:, 2] <= 1,\n    1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n    y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate the \"Friedman #3\" regression problem.\n\nThis dataset is described in Friedman [1] and Breiman [2].\n\nInputs `X` are 4 independent features uniformly distributed on the\nintervals::\n\n    0 <= X[:, 0] <= 100,\n    40 * pi <= X[:, 1] <= 560 * pi,\n    0 <= X[:, 2] <= 1,\n    1 <= X[:, 3] <= 11.\n\nThe output `y` is created according to the formula::\n\n    y(X) = arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) / X[:, 0]) + noise * N(0, 1).\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nnoise : float, default=0.0\n    The standard deviation of the gaussian noise applied to the output.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset noise. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 4)\n    The input samples.\n\ny : ndarray of shape (n_samples,)\n    The output values.\n\nReferences\n----------\n.. [1] J. Friedman, \"Multivariate adaptive regression splines\", The Annals\n       of Statistics 19 (1), pages 1-67, 1991.\n\n.. [2] L. Breiman, \"Bagging predictors\", Machine Learning 24,\n       pages 123-140, 1996."
         },
         {
@@ -60947,7 +60947,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate data for binary classification used in Hastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\ny[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate data for binary classification used in Hastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n  y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate data for binary classification used in Hastie et al. 2009, Example 10.2.\n\nThe ten features are standard independent Gaussian and\nthe target ``y`` is defined by::\n\n  y[i] = 1 if np.sum(X[i] ** 2) > 9.34 else -1\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=12000\n    The number of samples.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, 10)\n    The input samples.\n\ny : ndarray of shape (n_samples,)\n    The output values.\n\nSee Also\n--------\nmake_gaussian_quantiles : A generalization of this dataset approach.\n\nReferences\n----------\n.. [1] T. Hastie, R. Tibshirani and J. Friedman, \"Elements of Statistical\n       Learning Ed. 2\", Springer, 2009."
         },
         {
@@ -61058,7 +61058,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n(1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\ntail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n- gray level pictures of faces\n- TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n    (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n    tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate a mostly low rank matrix with bell-shaped singular values.\n\nMost of the variance can be explained by a bell-shaped curve of width\neffective_rank: the low rank part of the singular values profile is::\n\n    (1 - tail_strength) * exp(-1.0 * (i / effective_rank) ** 2)\n\nThe remaining singular values' tail is fat, decreasing as::\n\n    tail_strength * exp(-0.1 * i / effective_rank).\n\nThe low rank part of the profile can be considered the structured\nsignal part of the data while the tail can be considered the noisy\npart of the data that cannot be summarized by a low number of linear\ncomponents (singular vectors).\n\nThis kind of singular profiles is often seen in practice, for instance:\n - gray level pictures of faces\n - TF-IDF vectors of text documents crawled from the web\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nn_features : int, default=100\n    The number of features.\n\neffective_rank : int, default=10\n    The approximate number of singular vectors required to explain most of\n    the data by linear combinations.\n\ntail_strength : float, default=0.5\n    The relative importance of the fat noisy tail of the singular values\n    profile. The value should be between 0 and 1.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The matrix."
         },
         {
@@ -61077,7 +61077,7 @@
                     "docstring": {
                         "type": "int or tuple of shape (2,), dtype=int",
                         "default_value": "100",
-                        "description": "If int, the total number of points generated.\nIf two-element tuple, number of points in each of two moons.\n\n.. versionchanged:: 0.23\nAdded two-element tuple."
+                        "description": "If int, the total number of points generated.\nIf two-element tuple, number of points in each of two moons.\n\n.. versionchanged:: 0.23\n   Added two-element tuple."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -61286,7 +61286,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If ``True``, return a sparse feature matrix.\n\n.. versionadded:: 0.17\nparameter to allow *sparse* output."
+                        "description": "If ``True``, return a sparse feature matrix.\n\n.. versionadded:: 0.17\n   parameter to allow *sparse* output."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -61310,7 +61310,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["dense", "sparse"]
+                                "values": ["sparse", "dense"]
                             },
                             {
                                 "kind": "NamedType",
@@ -61370,7 +61370,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n- pick the number of labels: n ~ Poisson(n_labels)\n- n times, choose a class c: c ~ Multinomial(theta)\n- pick the document length: k ~ Poisson(length)\n- k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n    - pick the number of labels: n ~ Poisson(n_labels)\n    - n times, choose a class c: c ~ Multinomial(theta)\n    - pick the document length: k ~ Poisson(length)\n    - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate a random multilabel classification problem.\n\nFor each sample, the generative process is:\n    - pick the number of labels: n ~ Poisson(n_labels)\n    - n times, choose a class c: c ~ Multinomial(theta)\n    - pick the document length: k ~ Poisson(length)\n    - k times, choose a word: w ~ Multinomial(theta_c)\n\nIn the above process, rejection sampling is used to make sure that\nn is never zero or more than `n_classes`, and that the document length\nis never zero. Likewise, we reject classes which have already been chosen.\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nn_features : int, default=20\n    The total number of features.\n\nn_classes : int, default=5\n    The number of classes of the classification problem.\n\nn_labels : int, default=2\n    The average number of labels per instance. More precisely, the number\n    of labels per sample is drawn from a Poisson distribution with\n    ``n_labels`` as its expected value, but samples are bounded (using\n    rejection sampling) by ``n_classes``, and must be nonzero if\n    ``allow_unlabeled`` is False.\n\nlength : int, default=50\n    The sum of the features (number of words if documents) is drawn from\n    a Poisson distribution with this expected value.\n\nallow_unlabeled : bool, default=True\n    If ``True``, some instances might not belong to any class.\n\nsparse : bool, default=False\n    If ``True``, return a sparse feature matrix.\n\n    .. versionadded:: 0.17\n       parameter to allow *sparse* output.\n\nreturn_indicator : {'dense', 'sparse'} or False, default='dense'\n    If ``'dense'`` return ``Y`` in the dense binary indicator format. If\n    ``'sparse'`` return ``Y`` in the sparse binary indicator format.\n    ``False`` returns a list of lists of labels.\n\nreturn_distributions : bool, default=False\n    If ``True``, return the prior class probability and conditional\n    probabilities of features given classes, from which the data was\n    drawn.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The generated samples.\n\nY : {ndarray, sparse matrix} of shape (n_samples, n_classes)\n    The label sets. Sparse matrix should be of CSR format.\n\np_c : ndarray of shape (n_classes,)\n    The probability of each class being drawn. Only returned if\n    ``return_distributions=True``.\n\np_w_c : ndarray of shape (n_features, n_classes)\n    The probability of each feature being drawn given each class.\n    Only returned if ``return_distributions=True``."
         },
         {
@@ -61474,7 +61474,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "If not None:\nThe approximate number of singular vectors required to explain most\nof the input data by linear combinations. Using this kind of\nsingular spectrum in the input allows the generator to reproduce\nthe correlations often observed in practice.\nIf None:\nThe input set is well conditioned, centered and gaussian with\nunit variance."
+                        "description": "If not None:\n    The approximate number of singular vectors required to explain most\n    of the input data by linear combinations. Using this kind of\n    singular spectrum in the input allows the generator to reproduce\n    the correlations often observed in practice.\nIf None:\n    The input set is well conditioned, centered and gaussian with\n    unit variance."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -61993,7 +61993,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\nX ~ N(0, 1)\ny(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
+            "description": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n    X ~ N(0, 1)\n    y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide <sample_generators>`.",
             "docstring": "Generate a random regression problem with sparse uncorrelated design.\n\nThis dataset is described in Celeux et al [1]. as::\n\n    X ~ N(0, 1)\n    y(X) = X[:, 0] + 2 * X[:, 1] - 2 * X[:, 2] - 1.5 * X[:, 3]\n\nOnly the first 4 features are informative. The remaining features are\nuseless.\n\nRead more in the :ref:`User Guide <sample_generators>`.\n\nParameters\n----------\nn_samples : int, default=100\n    The number of samples.\n\nn_features : int, default=10\n    The number of features.\n\nrandom_state : int, RandomState instance or None, default=None\n    Determines random number generation for dataset creation. Pass an int\n    for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The input samples.\n\ny : ndarray of shape (n_samples,)\n    The output values.\n\nReferences\n----------\n.. [1] G. Celeux, M. El Anbari, J.-M. Marin, C. P. Robert,\n       \"Regularization in regression: comparing Bayesian and frequentist\n       methods in a poorly informative situation\", 2009."
         },
         {
@@ -62747,7 +62747,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n.. versionadded:: 0.17\nparameter *multilabel* to support multilabel datasets."
+                        "description": "Samples may have several labels each (see\nhttps://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html).\n\n.. versionadded:: 0.17\n   parameter *multilabel* to support multilabel datasets."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -62929,7 +62929,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.datasets"],
-            "description": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\nhttps://github.com/mblondel/svmlight-loader",
+            "description": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader",
             "docstring": "Load datasets in the svmlight / libsvm format into sparse CSR matrix\n\nThis format is a text-based format, with one sample per line. It does\nnot store zero valued features hence is suitable for sparse dataset.\n\nThe first element of each line can be used to store a target variable\nto predict.\n\nThis format is used as the default format for both svmlight and the\nlibsvm command line programs.\n\nParsing a text based source can be expensive. When repeatedly\nworking on the same dataset, it is recommended to wrap this\nloader with joblib.Memory.cache to store a memmapped backup of the\nCSR results of the first call and benefit from the near instantaneous\nloading of memmapped structures for the subsequent calls.\n\nIn case the file contains a pairwise preference constraint (known\nas \"qid\" in the svmlight format) these are ignored unless the\nquery_id parameter is set to True. These pairwise preference\nconstraints can be used to constraint the combination of samples\nwhen using pairwise loss functions (as is the case in some\nlearning to rank problems) so that only pairs with the same\nquery_id value are considered.\n\nThis implementation is written in Cython and is reasonably fast.\nHowever, a faster API-compatible loader is also available at:\n\n  https://github.com/mblondel/svmlight-loader\n\nParameters\n----------\nf : str, file-like or int\n    (Path to) a file to load. If a path ends in \".gz\" or \".bz2\", it will\n    be uncompressed on the fly. If an integer is passed, it is assumed to\n    be a file descriptor. A file-like or file descriptor will not be closed\n    by this function. A file-like object must be opened in binary mode.\n\nn_features : int, default=None\n    The number of features to use. If None, it will be inferred. This\n    argument is useful to load several files that are subsets of a\n    bigger sliced dataset: each subset might not have examples of\n    every feature, hence the inferred shape might vary from one\n    slice to another.\n    n_features is only required if ``offset`` or ``length`` are passed a\n    non-default value.\n\ndtype : numpy data type, default=np.float64\n    Data type of dataset to be loaded. This will be the data type of the\n    output numpy arrays ``X`` and ``y``.\n\nmultilabel : bool, default=False\n    Samples may have several labels each (see\n    https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multilabel.html)\n\nzero_based : bool or \"auto\", default=\"auto\"\n    Whether column indices in f are zero-based (True) or one-based\n    (False). If column indices are one-based, they are transformed to\n    zero-based to match Python/NumPy conventions.\n    If set to \"auto\", a heuristic check is applied to determine this from\n    the file contents. Both kinds of files occur \"in the wild\", but they\n    are unfortunately not self-identifying. Using \"auto\" or True should\n    always be safe when no ``offset`` or ``length`` is passed.\n    If ``offset`` or ``length`` are passed, the \"auto\" mode falls back\n    to ``zero_based=True`` to avoid having the heuristic check yield\n    inconsistent results on different segments of the file.\n\nquery_id : bool, default=False\n    If True, will return the query_id array for each file.\n\noffset : int, default=0\n    Ignore the offset first bytes by seeking forward, then\n    discarding the following bytes up until the next new line\n    character.\n\nlength : int, default=-1\n    If strictly positive, stop reading any new line of data once the\n    position in the file has reached the (offset + length) bytes threshold.\n\nReturns\n-------\nX : scipy.sparse matrix of shape (n_samples, n_features)\n\ny : ndarray of shape (n_samples,), or, in the multilabel a list of\n    tuples of length n_samples.\n\nquery_id : array of shape (n_samples,)\n   query_id for each sample. Only returned when query_id is set to\n   True.\n\nSee Also\n--------\nload_svmlight_files : Similar function for loading multiple files in this\n    format, enforcing the same number of features/columns on all of them.\n\nExamples\n--------\nTo use joblib.Memory to cache the svmlight file::\n\n    from joblib import Memory\n    from .datasets import load_svmlight_file\n    mem = Memory(\"./mycache\")\n\n    @mem.cache\n    def get_data():\n        data = load_svmlight_file(\"mysvmlightfile\")\n        return data[0], data[1]\n\n    X, y = get_data()"
         },
         {
@@ -63185,7 +63185,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["all", "train", "test"]
                     }
                 },
                 {
@@ -63339,7 +63339,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["test", "train", "all"]
+                        "values": ["all", "train", "test"]
                     }
                 },
                 {
@@ -63907,7 +63907,7 @@
                     "docstring": {
                         "type": "{'lars', 'cd'}",
                         "default_value": "'lars'",
-                        "description": "* `'lars'`: uses the least angle regression method to solve the lasso\nproblem (:func:`~sklearn.linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\nLasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\nfaster if the estimated components are sparse.\n\n.. versionadded:: 0.17\n*cd* coordinate descent method to improve speed."
+                        "description": "* `'lars'`: uses the least angle regression method to solve the lasso\n  problem (:func:`~sklearn.linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n  Lasso solution (:class:`~sklearn.linear_model.Lasso`). Lars will be\n  faster if the estimated components are sparse.\n\n.. versionadded:: 0.17\n   *cd* coordinate descent method to improve speed."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -63924,11 +63924,11 @@
                     "docstring": {
                         "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}",
                         "default_value": "'omp'",
-                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n(:func:`~sklearn.linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\nLasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\nwill be faster if the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\nsolution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\nthe projection ``dictionary * X'``.\n\n.. versionadded:: 0.17\n*lasso_cd* coordinate descent method to improve speed."
+                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n  (:func:`~sklearn.linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n  Lasso solution (:class:`~sklearn.linear_model.Lasso`). `'lasso_lars'`\n  will be faster if the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n  solution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n  the projection ``dictionary * X'``.\n\n.. versionadded:: 0.17\n   *lasso_cd* coordinate descent method to improve speed."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso_lars", "omp", "threshold", "lars", "lasso_cd"]
+                        "values": ["lasso_cd", "threshold", "omp", "lasso_lars", "lars"]
                     }
                 },
                 {
@@ -64144,7 +64144,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n(U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n    (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": ""
         },
         {
@@ -64326,7 +64326,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "1000",
-                        "description": "Total number of iterations over data batches to perform.\n\n.. deprecated:: 1.1\n``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n``max_iter`` instead."
+                        "description": "Total number of iterations over data batches to perform.\n\n.. deprecated:: 1.1\n   ``n_iter`` is deprecated in 1.1 and will be removed in 1.3. Use\n   ``max_iter`` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -64360,7 +64360,7 @@
                     "docstring": {
                         "type": "{'lars', 'cd'}",
                         "default_value": "'lars'",
-                        "description": "The algorithm used:\n\n- `'lars'`: uses the least angle regression method to solve the lasso\nproblem (`linear_model.lars_path`)\n- `'cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). Lars will be faster if\nthe estimated components are sparse."
+                        "description": "The algorithm used:\n\n- `'lars'`: uses the least angle regression method to solve the lasso\n  problem (`linear_model.lars_path`)\n- `'cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). Lars will be faster if\n  the estimated components are sparse."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -64445,11 +64445,11 @@
                     "docstring": {
                         "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}",
                         "default_value": "'omp'",
-                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n(`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\nif the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\nsolution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\nthe projection ``dictionary * X'``."
+                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n  (`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution.\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). `'lasso_lars'` will be faster\n  if the estimated components are sparse.\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n  solution.\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n  the projection ``dictionary * X'``."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso_lars", "omp", "threshold", "lars", "lasso_cd"]
+                        "values": ["lasso_cd", "threshold", "omp", "lasso_lars", "lars"]
                     }
                 },
                 {
@@ -64665,7 +64665,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n(U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Mini-batch dictionary learning.\n\nFinds a dictionary (a set of atoms) that performs well at sparsely\nencoding the fitted data.\n\nSolves the optimization problem::\n\n   (U^*,V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                (U,V)\n                with || V_k ||_2 <= 1 for all  0 <= k < n_components\n\n||.||_Fro stands for the Frobenius norm and ||.||_1,1 stands for\nthe entry-wise matrix norm which is the sum of the absolute values\nof all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": ""
         },
         {
@@ -64790,7 +64790,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Helper function to encapsulate the early stopping logic.\n\nEarly stopping is based on two factors:\n- A small change of the dictionary between two minibatch updates. This is\ncontrolled by the tol parameter.\n- No more improvement on a smoothed estimate of the objective function for a\na certain number of consecutive minibatch updates. This is controlled by\nthe max_no_improvement parameter.",
+            "description": "Helper function to encapsulate the early stopping logic.\n\nEarly stopping is based on two factors:\n- A small change of the dictionary between two minibatch updates. This is\n  controlled by the tol parameter.\n- No more improvement on a smoothed estimate of the objective function for a\n  a certain number of consecutive minibatch updates. This is controlled by\n  the max_no_improvement parameter.",
             "docstring": "Helper function to encapsulate the early stopping logic.\n\nEarly stopping is based on two factors:\n- A small change of the dictionary between two minibatch updates. This is\n  controlled by the tol parameter.\n- No more improvement on a smoothed estimate of the objective function for a\n  a certain number of consecutive minibatch updates. This is controlled by\n  the max_no_improvement parameter."
         },
         {
@@ -65294,7 +65294,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of iteration on data batches that has been\nperformed before this call to `partial_fit`. This is optional:\nif no number is passed, the memory of the object is\nused.\n\n.. deprecated:: 1.1\n``iter_offset`` will be removed in 1.3."
+                        "description": "The number of iteration on data batches that has been\nperformed before this call to `partial_fit`. This is optional:\nif no number is passed, the memory of the object is\nused.\n\n.. deprecated:: 1.1\n   ``iter_offset`` will be removed in 1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -65385,11 +65385,11 @@
                     "docstring": {
                         "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp',             'threshold'}",
                         "default_value": "'omp'",
-                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n(`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution;\n- `'lasso_cd'`: uses the coordinate descent method to compute the\nLasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\nthe estimated components are sparse;\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\nsolution;\n- `'threshold'`: squashes to zero all coefficients less than alpha from\nthe projection ``dictionary * X'``."
+                        "description": "Algorithm used to transform the data:\n\n- `'lars'`: uses the least angle regression method\n  (`linear_model.lars_path`);\n- `'lasso_lars'`: uses Lars to compute the Lasso solution;\n- `'lasso_cd'`: uses the coordinate descent method to compute the\n  Lasso solution (linear_model.Lasso). `'lasso_lars'` will be faster if\n  the estimated components are sparse;\n- `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n  solution;\n- `'threshold'`: squashes to zero all coefficients less than alpha from\n  the projection ``dictionary * X'``."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso_lars", "omp", "threshold", "lars", "lasso_cd"]
+                        "values": ["lasso_cd", "threshold", "omp", "lasso_lars", "lars"]
                     }
                 },
                 {
@@ -65498,7 +65498,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\nX ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
+            "description": "Sparse coding.\n\nFinds a sparse representation of data against a fixed, precomputed\ndictionary.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
             "docstring": ""
         },
         {
@@ -66158,11 +66158,11 @@
                     "docstring": {
                         "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}",
                         "default_value": "'lasso_lars'",
-                        "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n(`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). lasso_lars will be faster if\nthe estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\nsolution;\n* `'threshold'`: squashes to zero all coefficients less than\nregularization from the projection `dictionary * data'`."
+                        "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n  (`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n  the estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n  solution;\n* `'threshold'`: squashes to zero all coefficients less than\n  regularization from the projection `dictionary * data'`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso_lars", "omp", "threshold", "lars", "lasso_cd"]
+                        "values": ["lasso_cd", "threshold", "omp", "lasso_lars", "lars"]
                     }
                 },
                 {
@@ -66557,7 +66557,7 @@
                     "docstring": {
                         "type": "{'lars', 'cd'}",
                         "default_value": "'lars'",
-                        "description": "The method used:\n\n* `'lars'`: uses the least angle regression method to solve the lasso\nproblem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). Lars will be faster if\nthe estimated components are sparse."
+                        "description": "The method used:\n\n* `'lars'`: uses the least angle regression method to solve the lasso\n   problem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). Lars will be faster if\n  the estimated components are sparse."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -66751,7 +66751,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n(U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": "Solves a dictionary learning matrix factorization problem.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int\n    Number of dictionary atoms to extract.\n\nalpha : int\n    Sparsity controlling parameter.\n\nmax_iter : int, default=100\n    Maximum number of iterations to perform.\n\ntol : float, default=1e-8\n    Tolerance for the stopping condition.\n\nmethod : {'lars', 'cd'}, default='lars'\n    The method used:\n\n    * `'lars'`: uses the least angle regression method to solve the lasso\n       problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial value for the dictionary for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncode_init : ndarray of shape (n_samples, n_components), default=None\n    Initial value for the sparse code for warm restart scenarios. Only used\n    if `code_init` and `dict_init` are not None.\n\ncallback : callable, default=None\n    Callable that gets invoked every five iterations\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for randomly initializing the dictionary. Pass an int for\n    reproducible results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform.\n\n    .. versionadded:: 0.22\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n    The sparse code factor in the matrix factorization.\n\ndictionary : ndarray of shape (n_components, n_features),\n    The dictionary factor in the matrix factorization.\n\nerrors : array\n    Vector of errors at each iteration.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to True.\n\nSee Also\n--------\ndict_learning_online\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA"
         },
         {
@@ -66830,7 +66830,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "Number of mini-batch iterations to perform.\n\n.. deprecated:: 1.1\n`n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n`max_iter` instead."
+                        "description": "Number of mini-batch iterations to perform.\n\n.. deprecated:: 1.1\n   `n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n   `max_iter` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -66983,7 +66983,7 @@
                     "docstring": {
                         "type": "{'lars', 'cd'}",
                         "default_value": "'lars'",
-                        "description": "* `'lars'`: uses the least angle regression method to solve the lasso\nproblem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). Lars will be faster if\nthe estimated components are sparse."
+                        "description": "* `'lars'`: uses the least angle regression method to solve the lasso\n  problem (`linear_model.lars_path`);\n* `'cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). Lars will be faster if\n  the estimated components are sparse."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -67000,7 +67000,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
-                        "description": "Number of previous iterations completed on the dictionary used for\ninitialization.\n\n.. deprecated:: 1.1\n`iter_offset` serves internal purpose only and will be removed in 1.3."
+                        "description": "Number of previous iterations completed on the dictionary used for\ninitialization.\n\n.. deprecated:: 1.1\n   `iter_offset` serves internal purpose only and will be removed in 1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -67047,7 +67047,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Return the inner statistics A (dictionary covariance) and B\n(data approximation). Useful to restart the algorithm in an\nonline setting. If `return_inner_stats` is `True`, `return_code` is\nignored.\n\n.. deprecated:: 1.1\n`return_inner_stats` serves internal purpose only and will be removed in 1.3."
+                        "description": "Return the inner statistics A (dictionary covariance) and B\n(data approximation). Useful to restart the algorithm in an\nonline setting. If `return_inner_stats` is `True`, `return_code` is\nignored.\n\n.. deprecated:: 1.1\n   `return_inner_stats` serves internal purpose only and will be removed in 1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -67064,7 +67064,7 @@
                     "docstring": {
                         "type": "tuple of (A, B) ndarrays",
                         "default_value": "None",
-                        "description": "Inner sufficient statistics that are kept by the algorithm.\nPassing them at initialization is useful in online settings, to\navoid losing the history of the evolution.\n`A` `(n_components, n_components)` is the dictionary covariance matrix.\n`B` `(n_features, n_components)` is the data approximation matrix.\n\n.. deprecated:: 1.1\n`inner_stats` serves internal purpose only and will be removed in 1.3."
+                        "description": "Inner sufficient statistics that are kept by the algorithm.\nPassing them at initialization is useful in online settings, to\navoid losing the history of the evolution.\n`A` `(n_components, n_components)` is the dictionary covariance matrix.\n`B` `(n_features, n_components)` is the data approximation matrix.\n\n.. deprecated:: 1.1\n   `inner_stats` serves internal purpose only and will be removed in 1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -67081,7 +67081,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Whether or not to return the number of iterations.\n\n.. deprecated:: 1.1\n`return_n_iter` will be removed in 1.3 and n_iter will always be returned."
+                        "description": "Whether or not to return the number of iterations.\n\n.. deprecated:: 1.1\n   `return_n_iter` will be removed in 1.3 and n_iter will always be returned."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -67177,7 +67177,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n(U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n(U,V)\nwith || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
+            "description": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.",
             "docstring": "Solves a dictionary learning matrix factorization problem online.\n\nFinds the best dictionary and the corresponding sparse code for\napproximating the data matrix X by solving::\n\n    (U^*, V^*) = argmin 0.5 || X - U V ||_Fro^2 + alpha * || U ||_1,1\n                 (U,V)\n                 with || V_k ||_2 = 1 for all  0 <= k < n_components\n\nwhere V is the dictionary and U is the sparse code. ||.||_Fro stands for\nthe Frobenius norm and ||.||_1,1 stands for the entry-wise matrix norm\nwhich is the sum of the absolute values of all the entries in the matrix.\nThis is accomplished by repeatedly iterating over mini-batches by slicing\nthe input data.\n\nRead more in the :ref:`User Guide <DictionaryLearning>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\nn_components : int or None, default=2\n    Number of dictionary atoms to extract. If None, then ``n_components``\n    is set to ``n_features``.\n\nalpha : float, default=1\n    Sparsity controlling parameter.\n\nn_iter : int, default=100\n    Number of mini-batch iterations to perform.\n\n    .. deprecated:: 1.1\n       `n_iter` is deprecated in 1.1 and will be removed in 1.3. Use\n       `max_iter` instead.\n\nmax_iter : int, default=None\n    Maximum number of iterations over the complete dataset before\n    stopping independently of any early stopping criterion heuristics.\n    If ``max_iter`` is not None, ``n_iter`` is ignored.\n\n    .. versionadded:: 1.1\n\nreturn_code : bool, default=True\n    Whether to also return the code U or just the dictionary `V`.\n\ndict_init : ndarray of shape (n_components, n_features), default=None\n    Initial values for the dictionary for warm restart scenarios.\n    If `None`, the initial values for the dictionary are created\n    with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.\n\ncallback : callable, default=None\n    A callable that gets invoked at the end of each iteration.\n\nbatch_size : int, default=3\n    The number of samples to take in each batch.\n\nverbose : bool, default=False\n    To control the verbosity of the procedure.\n\nshuffle : bool, default=True\n    Whether to shuffle the data before splitting it in batches.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nmethod : {'lars', 'cd'}, default='lars'\n    * `'lars'`: uses the least angle regression method to solve the lasso\n      problem (`linear_model.lars_path`);\n    * `'cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). Lars will be faster if\n      the estimated components are sparse.\n\niter_offset : int, default=0\n    Number of previous iterations completed on the dictionary used for\n    initialization.\n\n    .. deprecated:: 1.1\n       `iter_offset` serves internal purpose only and will be removed in 1.3.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for initializing the dictionary when ``dict_init`` is not\n    specified, randomly shuffling the data when ``shuffle`` is set to\n    ``True``, and updating the dictionary. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nreturn_inner_stats : bool, default=False\n    Return the inner statistics A (dictionary covariance) and B\n    (data approximation). Useful to restart the algorithm in an\n    online setting. If `return_inner_stats` is `True`, `return_code` is\n    ignored.\n\n    .. deprecated:: 1.1\n       `return_inner_stats` serves internal purpose only and will be removed in 1.3.\n\ninner_stats : tuple of (A, B) ndarrays, default=None\n    Inner sufficient statistics that are kept by the algorithm.\n    Passing them at initialization is useful in online settings, to\n    avoid losing the history of the evolution.\n    `A` `(n_components, n_components)` is the dictionary covariance matrix.\n    `B` `(n_features, n_components)` is the data approximation matrix.\n\n    .. deprecated:: 1.1\n       `inner_stats` serves internal purpose only and will be removed in 1.3.\n\nreturn_n_iter : bool, default=False\n    Whether or not to return the number of iterations.\n\n    .. deprecated:: 1.1\n       `return_n_iter` will be removed in 1.3 and n_iter will always be returned.\n\npositive_dict : bool, default=False\n    Whether to enforce positivity when finding the dictionary.\n\n    .. versionadded:: 0.20\n\npositive_code : bool, default=False\n    Whether to enforce positivity when finding the code.\n\n    .. versionadded:: 0.20\n\nmethod_max_iter : int, default=1000\n    Maximum number of iterations to perform when solving the lasso problem.\n\n    .. versionadded:: 0.22\n\ntol : float, default=1e-3\n    Control early stopping based on the norm of the differences in the\n    dictionary between 2 steps. Used only if `max_iter` is not None.\n\n    To disable early stopping based on changes in the dictionary, set\n    `tol` to 0.0.\n\n    .. versionadded:: 1.1\n\nmax_no_improvement : int, default=10\n    Control early stopping based on the consecutive number of mini batches\n    that does not yield an improvement on the smoothed cost function. Used only if\n    `max_iter` is not None.\n\n    To disable convergence detection based on cost function, set\n    `max_no_improvement` to None.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components),\n    The sparse code (only returned if `return_code=True`).\n\ndictionary : ndarray of shape (n_components, n_features),\n    The solutions to the dictionary learning problem.\n\nn_iter : int\n    Number of iterations run. Returned only if `return_n_iter` is\n    set to `True`.\n\nSee Also\n--------\ndict_learning\nDictionaryLearning\nMiniBatchDictionaryLearning\nSparsePCA\nMiniBatchSparsePCA"
         },
         {
@@ -67264,11 +67264,11 @@
                     "docstring": {
                         "type": "{'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'}",
                         "default_value": "'lasso_lars'",
-                        "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n(`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\nLasso solution (`linear_model.Lasso`). lasso_lars will be faster if\nthe estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\nsolution;\n* `'threshold'`: squashes to zero all coefficients less than\nregularization from the projection `dictionary * data'`."
+                        "description": "The algorithm used:\n\n* `'lars'`: uses the least angle regression method\n  (`linear_model.lars_path`);\n* `'lasso_lars'`: uses Lars to compute the Lasso solution;\n* `'lasso_cd'`: uses the coordinate descent method to compute the\n  Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n  the estimated components are sparse;\n* `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n  solution;\n* `'threshold'`: squashes to zero all coefficients less than\n  regularization from the projection `dictionary * data'`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lasso_lars", "omp", "threshold", "lars", "lasso_cd"]
+                        "values": ["lasso_cd", "threshold", "omp", "lasso_lars", "lars"]
                     }
                 },
                 {
@@ -67428,7 +67428,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Sparse coding.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\nX ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
+            "description": "Sparse coding.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.",
             "docstring": "Sparse coding.\n\nEach row of the result is the solution to a sparse coding problem.\nThe goal is to find a sparse array `code` such that::\n\n    X ~= code * dictionary\n\nRead more in the :ref:`User Guide <SparseCoder>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features)\n    Data matrix.\n\ndictionary : ndarray of shape (n_components, n_features)\n    The dictionary matrix against which to solve the sparse coding of\n    the data. Some of the algorithms assume normalized rows for meaningful\n    output.\n\ngram : ndarray of shape (n_components, n_components), default=None\n    Precomputed Gram matrix, `dictionary * dictionary'`.\n\ncov : ndarray of shape (n_components, n_samples), default=None\n    Precomputed covariance, `dictionary' * X`.\n\nalgorithm : {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'},             default='lasso_lars'\n    The algorithm used:\n\n    * `'lars'`: uses the least angle regression method\n      (`linear_model.lars_path`);\n    * `'lasso_lars'`: uses Lars to compute the Lasso solution;\n    * `'lasso_cd'`: uses the coordinate descent method to compute the\n      Lasso solution (`linear_model.Lasso`). lasso_lars will be faster if\n      the estimated components are sparse;\n    * `'omp'`: uses orthogonal matching pursuit to estimate the sparse\n      solution;\n    * `'threshold'`: squashes to zero all coefficients less than\n      regularization from the projection `dictionary * data'`.\n\nn_nonzero_coefs : int, default=None\n    Number of nonzero coefficients to target in each column of the\n    solution. This is only used by `algorithm='lars'` and `algorithm='omp'`\n    and is overridden by `alpha` in the `omp` case. If `None`, then\n    `n_nonzero_coefs=int(n_features / 10)`.\n\nalpha : float, default=None\n    If `algorithm='lasso_lars'` or `algorithm='lasso_cd'`, `alpha` is the\n    penalty applied to the L1 norm.\n    If `algorithm='threshold'`, `alpha` is the absolute value of the\n    threshold below which coefficients will be squashed to zero.\n    If `algorithm='omp'`, `alpha` is the tolerance parameter: the value of\n    the reconstruction error targeted. In this case, it overrides\n    `n_nonzero_coefs`.\n    If `None`, default to 1.\n\ncopy_cov : bool, default=True\n    Whether to copy the precomputed covariance matrix; if `False`, it may\n    be overwritten.\n\ninit : ndarray of shape (n_samples, n_components), default=None\n    Initialization value of the sparse codes. Only used if\n    `algorithm='lasso_cd'`.\n\nmax_iter : int, default=1000\n    Maximum number of iterations to perform if `algorithm='lasso_cd'` or\n    `'lasso_lars'`.\n\nn_jobs : int, default=None\n    Number of parallel jobs to run.\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\ncheck_input : bool, default=True\n    If `False`, the input arrays X and dictionary will not be checked.\n\nverbose : int, default=0\n    Controls the verbosity; the higher, the more messages.\n\npositive : bool, default=False\n    Whether to enforce positivity when finding the encoding.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ncode : ndarray of shape (n_samples, n_components)\n    The sparse codes.\n\nSee Also\n--------\nsklearn.linear_model.lars_path : Compute Least Angle Regression or Lasso\n    path using LARS algorithm.\nsklearn.linear_model.orthogonal_mp : Solves Orthogonal Matching Pursuit problems.\nsklearn.linear_model.Lasso : Train Linear Model with L1 prior as regularizer.\nSparseCoder : Find a sparse representation of data from a fixed precomputed\n    dictionary."
         },
         {
@@ -68113,7 +68113,7 @@
                     "docstring": {
                         "type": "str or bool",
                         "default_value": "\"warn\"",
-                        "description": "Specify the whitening strategy to use.\nIf 'arbitrary-variance' (default), a whitening with variance arbitrary is used.\nIf 'unit-variance', the whitening matrix is rescaled to ensure that each\nrecovered source has unit variance.\nIf False, the data is already considered to be whitened, and no\nwhitening is performed.\n\n.. deprecated:: 1.1\nFrom version 1.3 whiten='unit-variance' will be used by default.\n`whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\nUse `whiten=arbitrary-variance` instead."
+                        "description": "Specify the whitening strategy to use.\nIf 'arbitrary-variance' (default), a whitening with variance arbitrary is used.\nIf 'unit-variance', the whitening matrix is rescaled to ensure that each\nrecovered source has unit variance.\nIf False, the data is already considered to be whitened, and no\nwhitening is performed.\n\n.. deprecated:: 1.1\n    From version 1.3 whiten='unit-variance' will be used by default.\n    `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n    Use `whiten=arbitrary-variance` instead."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -68139,14 +68139,14 @@
                     "docstring": {
                         "type": "{'logcosh', 'exp', 'cube'} or callable",
                         "default_value": "'logcosh'",
-                        "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. Example::\n\ndef my_g(x):\nreturn x ** 3, (3 * x ** 2).mean(axis=-1)"
+                        "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. Example::\n\n    def my_g(x):\n        return x ** 3, (3 * x ** 2).mean(axis=-1)"
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["cube", "exp", "logcosh"]
+                                "values": ["logcosh", "cube", "exp"]
                             },
                             {
                                 "kind": "NamedType",
@@ -69094,7 +69094,7 @@
                     "docstring": {
                         "type": "str or bool",
                         "default_value": "\"warn\"",
-                        "description": "Specify the whitening strategy to use.\nIf 'arbitrary-variance'  (default), a whitening with variance arbitrary is used.\nIf 'unit-variance', the whitening matrix is rescaled to ensure that each\nrecovered source has unit variance.\nIf False, the data is already considered to be whitened, and no\nwhitening is performed.\n\n.. deprecated:: 1.1\nFrom version 1.3, `whiten='unit-variance'` will be used by default.\n`whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\nUse `whiten=arbitrary-variance` instead."
+                        "description": "Specify the whitening strategy to use.\nIf 'arbitrary-variance'  (default), a whitening with variance arbitrary is used.\nIf 'unit-variance', the whitening matrix is rescaled to ensure that each\nrecovered source has unit variance.\nIf False, the data is already considered to be whitened, and no\nwhitening is performed.\n\n.. deprecated:: 1.1\n    From version 1.3, `whiten='unit-variance'` will be used by default.\n    `whiten=True` is deprecated from 1.1 and will raise ValueError in 1.3.\n    Use `whiten=arbitrary-variance` instead."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -69120,14 +69120,14 @@
                     "docstring": {
                         "type": "{'logcosh', 'exp', 'cube'} or callable",
                         "default_value": "'logcosh'",
-                        "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. The derivative should be averaged along its last dimension.\nExample:\n\ndef my_g(x):\nreturn x ** 3, np.mean(3 * x ** 2, axis=-1)"
+                        "description": "The functional form of the G function used in the\napproximation to neg-entropy. Could be either 'logcosh', 'exp',\nor 'cube'.\nYou can also provide your own function. It should return a tuple\ncontaining the value of the function, and of its derivative, in the\npoint. The derivative should be averaged along its last dimension.\nExample:\n\ndef my_g(x):\n    return x ** 3, np.mean(3 * x ** 2, axis=-1)"
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["cube", "exp", "logcosh"]
+                                "values": ["logcosh", "cube", "exp"]
                             },
                             {
                                 "kind": "NamedType",
@@ -69639,7 +69639,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["cosine", "poly", "linear", "precomputed", "sigmoid", "rbf"]
+                        "values": ["rbf", "linear", "poly", "precomputed", "sigmoid", "cosine"]
                     }
                 },
                 {
@@ -69754,11 +69754,11 @@
                     "docstring": {
                         "type": "{'auto', 'dense', 'arpack', 'randomized'}",
                         "default_value": "'auto'",
-                        "description": "Select eigensolver to use. If `n_components` is much\nless than the number of training samples, randomized (or arpack to a\nsmaller extent) may be more efficient than the dense eigensolver.\nRandomized SVD is performed according to the method of Halko et al\n[3]_.\n\nauto :\nthe solver is selected by a default policy based on n_samples\n(the number of training samples) and `n_components`:\nif the number of components to extract is less than 10 (strict) and\nthe number of samples is more than 200 (strict), the 'arpack'\nmethod is enabled. Otherwise the exact full eigenvalue\ndecomposition is computed and optionally truncated afterwards\n('dense' method).\ndense :\nrun exact full eigenvalue decomposition calling the standard\nLAPACK solver via `scipy.linalg.eigh`, and select the components\nby postprocessing\narpack :\nrun SVD truncated to n_components calling ARPACK solver using\n`scipy.sparse.linalg.eigsh`. It requires strictly\n0 < n_components < n_samples\nrandomized :\nrun randomized SVD by the method of Halko et al. [3]_. The current\nimplementation selects eigenvalues based on their module; therefore\nusing this method can lead to unexpected results if the kernel is\nnot positive semi-definite. See also [4]_.\n\n.. versionchanged:: 1.0\n`'randomized'` was added."
+                        "description": "Select eigensolver to use. If `n_components` is much\nless than the number of training samples, randomized (or arpack to a\nsmaller extent) may be more efficient than the dense eigensolver.\nRandomized SVD is performed according to the method of Halko et al\n[3]_.\n\nauto :\n    the solver is selected by a default policy based on n_samples\n    (the number of training samples) and `n_components`:\n    if the number of components to extract is less than 10 (strict) and\n    the number of samples is more than 200 (strict), the 'arpack'\n    method is enabled. Otherwise the exact full eigenvalue\n    decomposition is computed and optionally truncated afterwards\n    ('dense' method).\ndense :\n    run exact full eigenvalue decomposition calling the standard\n    LAPACK solver via `scipy.linalg.eigh`, and select the components\n    by postprocessing\narpack :\n    run SVD truncated to n_components calling ARPACK solver using\n    `scipy.sparse.linalg.eigsh`. It requires strictly\n    0 < n_components < n_samples\nrandomized :\n    run randomized SVD by the method of Halko et al. [3]_. The current\n    implementation selects eigenvalues based on their module; therefore\n    using this method can lead to unexpected results if the kernel is\n    not positive semi-definite. See also [4]_.\n\n.. versionchanged:: 1.0\n   `'randomized'` was added."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "dense", "randomized", "arpack"]
+                        "values": ["randomized", "dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -70351,7 +70351,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n:meth:`~sklearn.decomposition.fit` internally uses a centered\nkernel. As the centered kernel no longer contains the information\nof the mean of kernel features, such information is not taken into\naccount in reconstruction.\n\n.. note::\nWhen users want to compute inverse transformation for 'linear'\nkernel, it is recommended that they use\n:class:`~sklearn.decomposition.PCA` instead. Unlike\n:class:`~sklearn.decomposition.PCA`,\n:class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\ndoes not reconstruct the mean of data when 'linear' kernel is used\ndue to the use of centered kernel.",
+            "description": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n    :meth:`~sklearn.decomposition.fit` internally uses a centered\n    kernel. As the centered kernel no longer contains the information\n    of the mean of kernel features, such information is not taken into\n    account in reconstruction.\n\n.. note::\n    When users want to compute inverse transformation for 'linear'\n    kernel, it is recommended that they use\n    :class:`~sklearn.decomposition.PCA` instead. Unlike\n    :class:`~sklearn.decomposition.PCA`,\n    :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n    does not reconstruct the mean of data when 'linear' kernel is used\n    due to the use of centered kernel.",
             "docstring": "Transform X back to original space.\n\n``inverse_transform`` approximates the inverse transformation using\na learned pre-image. The pre-image is learned by kernel ridge\nregression of the original data on their low-dimensional representation\nvectors.\n\n.. note:\n    :meth:`~sklearn.decomposition.fit` internally uses a centered\n    kernel. As the centered kernel no longer contains the information\n    of the mean of kernel features, such information is not taken into\n    account in reconstruction.\n\n.. note::\n    When users want to compute inverse transformation for 'linear'\n    kernel, it is recommended that they use\n    :class:`~sklearn.decomposition.PCA` instead. Unlike\n    :class:`~sklearn.decomposition.PCA`,\n    :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``\n    does not reconstruct the mean of data when 'linear' kernel is used\n    due to the use of centered kernel.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_components)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\nReturns\n-------\nX_new : ndarray of shape (n_samples, n_features)\n    Returns the instance itself.\n\nReferences\n----------\n`Bak\u0131r, G\u00f6khan H., Jason Weston, and Bernhard Sch\u00f6lkopf.\n\"Learning to find pre-images.\"\nAdvances in neural information processing systems 16 (2004): 449-456.\n<https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.68.5164&rep=rep1&type=pdf>`_"
         },
         {
@@ -70467,7 +70467,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "10",
-                        "description": "Number of topics.\n\n.. versionchanged:: 0.19\n``n_topics`` was renamed to ``n_components``"
+                        "description": "Number of topics.\n\n.. versionchanged:: 0.19\n    ``n_topics`` was renamed to ``n_components``"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -70518,11 +70518,11 @@
                     "docstring": {
                         "type": "{'batch', 'online'}",
                         "default_value": "'batch'",
-                        "description": "Method used to update `_component`. Only used in :meth:`fit` method.\nIn general, if the data size is large, the online update will be much\nfaster than the batch update.\n\nValid options::\n\n'batch': Batch variational Bayes method. Use all training data in\neach EM update.\nOld `components_` will be overwritten in each iteration.\n'online': Online variational Bayes method. In each EM update, use\nmini-batch of training data to update the ``components_``\nvariable incrementally. The learning rate is controlled by the\n``learning_decay`` and the ``learning_offset`` parameters.\n\n.. versionchanged:: 0.20\nThe default learning method is now ``\"batch\"``."
+                        "description": "Method used to update `_component`. Only used in :meth:`fit` method.\nIn general, if the data size is large, the online update will be much\nfaster than the batch update.\n\nValid options::\n\n    'batch': Batch variational Bayes method. Use all training data in\n        each EM update.\n        Old `components_` will be overwritten in each iteration.\n    'online': Online variational Bayes method. In each EM update, use\n        mini-batch of training data to update the ``components_``\n        variable incrementally. The learning rate is controlled by the\n        ``learning_decay`` and the ``learning_offset`` parameters.\n\n.. versionchanged:: 0.20\n    The default learning method is now ``\"batch\"``."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["batch", "online"]
+                        "values": ["online", "batch"]
                     }
                 },
                 {
@@ -71591,7 +71591,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n*doc_topic_distr* argument has been deprecated and is ignored\nbecause user no longer has access to unnormalized distribution",
+            "description": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n   *doc_topic_distr* argument has been deprecated and is ignored\n   because user no longer has access to unnormalized distribution",
             "docstring": "Calculate approximate perplexity for data X.\n\nPerplexity is defined as exp(-1. * log-likelihood per word)\n\n.. versionchanged:: 0.19\n   *doc_topic_distr* argument has been deprecated and is ignored\n   because user no longer has access to unnormalized distribution\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\nsub_sampling : bool\n    Do sub-sampling or not.\n\nReturns\n-------\nscore : float\n    Perplexity score."
         },
         {
@@ -71714,7 +71714,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Transform data X according to the fitted model.\n\n.. versionchanged:: 0.18\n*doc_topic_distr* is now normalized",
+            "description": "Transform data X according to the fitted model.\n\n   .. versionchanged:: 0.18\n      *doc_topic_distr* is now normalized",
             "docstring": "Transform data X according to the fitted model.\n\n   .. versionchanged:: 0.18\n      *doc_topic_distr* is now normalized\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Document word matrix.\n\nReturns\n-------\ndoc_topic_distr : ndarray of shape (n_samples, n_components)\n    Document topic distribution for X."
         },
         {
@@ -71914,11 +71914,11 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\nValid options:\n\n- `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\notherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n`sqrt(X.mean() / n_components)`\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\ninitialization (better for sparseness).\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n(better when sparsity is not desired).\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n(generally faster, less accurate alternative to NNDSVDa\nfor when sparsity is not desired).\n\n- `'custom'`: use custom matrices `W` and `H`"
+                        "description": "Method used to initialize the procedure.\nValid options:\n\n- `None`: 'nndsvda' if `n_components <= min(n_samples, n_features)`,\n  otherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n  `sqrt(X.mean() / n_components)`\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n  initialization (better for sparseness).\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n  (better when sparsity is not desired).\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n  (generally faster, less accurate alternative to NNDSVDa\n  for when sparsity is not desired).\n\n- `'custom'`: use custom matrices `W` and `H`"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nndsvdar", "nndsvda", "nndsvd", "custom", "random"]
+                        "values": ["nndsvd", "custom", "nndsvdar", "nndsvda", "random"]
                     }
                 },
                 {
@@ -71955,7 +71955,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
+                                "values": ["itakura-saito", "frobenius", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -72194,7 +72194,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n.. math::\n\nL(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n&+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n&+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n&+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n&+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
+            "description": "Mini-Batch Non-Negative Matrix Factorization (NMF).\n\n.. versionadded:: 1.1\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements,\n(`W`, `H`) whose product approximates the non-negative matrix `X`. This\nfactorization can be used for example for dimensionality reduction, source\nseparation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe objective function is minimized with an alternating minimization of `W`\nand `H`.\n\nNote that the transformed data is named `W` and the components matrix is\nnamed `H`. In the NMF literature, the naming convention is usually the opposite\nsince the data matrix `X` is transposed.\n\nRead more in the :ref:`User Guide <MiniBatchNMF>`.",
             "docstring": ""
         },
         {
@@ -72927,11 +72927,11 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\nDefault: None.\nValid options:\n\n- `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\notherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\nsqrt(X.mean() / n_components)\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\ninitialization (better for sparseness)\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n(better when sparsity is not desired)\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n(generally faster, less accurate alternative to NNDSVDa\nfor when sparsity is not desired)\n\n- `'custom'`: use custom matrices W and H\n\n.. versionchanged:: 1.1\nWhen `init=None` and n_components is less than n_samples and n_features\ndefaults to `nndsvda` instead of `nndsvd`."
+                        "description": "Method used to initialize the procedure.\nDefault: None.\nValid options:\n\n- `None`: 'nndsvda' if n_components <= min(n_samples, n_features),\n  otherwise random.\n\n- `'random'`: non-negative random matrices, scaled with:\n  sqrt(X.mean() / n_components)\n\n- `'nndsvd'`: Nonnegative Double Singular Value Decomposition (NNDSVD)\n  initialization (better for sparseness)\n\n- `'nndsvda'`: NNDSVD with zeros filled with the average of X\n  (better when sparsity is not desired)\n\n- `'nndsvdar'` NNDSVD with zeros filled with small random values\n  (generally faster, less accurate alternative to NNDSVDa\n  for when sparsity is not desired)\n\n- `'custom'`: use custom matrices W and H\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nndsvdar", "nndsvda", "nndsvd", "custom", "random"]
+                        "values": ["nndsvd", "custom", "nndsvdar", "nndsvda", "random"]
                     }
                 },
                 {
@@ -72944,11 +72944,11 @@
                     "docstring": {
                         "type": "{'cd', 'mu'}",
                         "default_value": "'cd'",
-                        "description": "Numerical solver to use:\n'cd' is a Coordinate Descent solver.\n'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\nCoordinate Descent solver.\n\n.. versionadded:: 0.19\nMultiplicative Update solver."
+                        "description": "Numerical solver to use:\n'cd' is a Coordinate Descent solver.\n'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["cd", "mu"]
+                        "values": ["mu", "cd"]
                     }
                 },
                 {
@@ -72968,7 +72968,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
+                                "values": ["itakura-saito", "frobenius", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -73051,7 +73051,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "Constant that multiplies the regularization terms. Set it to zero to\nhave no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\nthe regularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. versionadded:: 0.17\n*alpha* used in the Coordinate Descent solver.\n\n.. deprecated:: 1.0\nThe `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\nUse `alpha_W` and `alpha_H` instead."
+                        "description": "Constant that multiplies the regularization terms. Set it to zero to\nhave no regularization. When using `alpha` instead of `alpha_W` and `alpha_H`,\nthe regularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. versionadded:: 0.17\n   *alpha* used in the Coordinate Descent solver.\n\n.. deprecated:: 1.0\n    The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n    Use `alpha_W` and `alpha_H` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -73111,7 +73111,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "The regularization mixing parameter, with 0 <= l1_ratio <= 1.\nFor l1_ratio = 0 the penalty is an elementwise L2 penalty\n(aka Frobenius Norm).\nFor l1_ratio = 1 it is an elementwise L1 penalty.\nFor 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n.. versionadded:: 0.17\nRegularization parameter *l1_ratio* used in the Coordinate Descent\nsolver."
+                        "description": "The regularization mixing parameter, with 0 <= l1_ratio <= 1.\nFor l1_ratio = 0 the penalty is an elementwise L2 penalty\n(aka Frobenius Norm).\nFor l1_ratio = 1 it is an elementwise L1 penalty.\nFor 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\n.. versionadded:: 0.17\n   Regularization parameter *l1_ratio* used in the Coordinate Descent\n   solver."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -73145,7 +73145,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If true, randomize the order of coordinates in the CD solver.\n\n.. versionadded:: 0.17\n*shuffle* parameter used in the Coordinate Descent solver."
+                        "description": "If true, randomize the order of coordinates in the CD solver.\n\n.. versionadded:: 0.17\n   *shuffle* parameter used in the Coordinate Descent solver."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -73162,18 +73162,18 @@
                     "docstring": {
                         "type": "{'both', 'components', 'transformation', None}",
                         "default_value": "'both'",
-                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\nThe `regularization` parameter is deprecated in 1.0 and will be removed in\n1.2. Use `alpha_W` and `alpha_H` instead."
+                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n    The `regularization` parameter is deprecated in 1.0 and will be removed in\n    1.2. Use `alpha_W` and `alpha_H` instead."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["components", "transformation", "both"]
+                        "values": ["both", "transformation", "components"]
                     }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n.. math::\n\nL(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n&+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n&+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n&+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n&+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
+            "description": "Non-Negative Matrix Factorization (NMF).\n\nFind two non-negative matrices, i.e. matrices with all non-negative elements, (W, H)\nwhose product approximates the non-negative matrix X. This factorization can be used\nfor example for dimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nRead more in the :ref:`User Guide <NMF>`.",
             "docstring": ""
         },
         {
@@ -73914,7 +73914,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
+                                "values": ["itakura-saito", "frobenius", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -74433,7 +74433,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
+                                "values": ["itakura-saito", "frobenius", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -74635,7 +74635,7 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\nValid options:\n\n- None: 'nndsvda' if n_components <= min(n_samples, n_features),\notherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\nsqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\ninitialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n(better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n(generally faster, less accurate alternative to NNDSVDa\nfor when sparsity is not desired)\n\n- 'custom': use custom matrices W and H\n\n.. versionchanged:: 1.1\nWhen `init=None` and n_components is less than n_samples and n_features\ndefaults to `nndsvda` instead of `nndsvd`."
+                        "description": "Method used to initialize the procedure.\nValid options:\n\n- None: 'nndsvda' if n_components <= min(n_samples, n_features),\n    otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\n    sqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n    initialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n    (better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n    (generally faster, less accurate alternative to NNDSVDa\n    for when sparsity is not desired)\n\n- 'custom': use custom matrices W and H\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -75266,11 +75266,11 @@
                     "docstring": {
                         "type": "{'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}",
                         "default_value": "None",
-                        "description": "Method used to initialize the procedure.\n\nValid options:\n\n- None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\nsqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\ninitialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n(better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n(generally faster, less accurate alternative to NNDSVDa\nfor when sparsity is not desired)\n\n- 'custom': use custom matrices W and H if `update_H=True`. If\n`update_H=False`, then only custom matrix H is used.\n\n.. versionchanged:: 0.23\nThe default value of `init` changed from 'random' to None in 0.23.\n\n.. versionchanged:: 1.1\nWhen `init=None` and n_components is less than n_samples and n_features\ndefaults to `nndsvda` instead of `nndsvd`."
+                        "description": "Method used to initialize the procedure.\n\nValid options:\n\n- None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n- 'random': non-negative random matrices, scaled with:\n    sqrt(X.mean() / n_components)\n\n- 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n    initialization (better for sparseness)\n\n- 'nndsvda': NNDSVD with zeros filled with the average of X\n    (better when sparsity is not desired)\n\n- 'nndsvdar': NNDSVD with zeros filled with small random values\n    (generally faster, less accurate alternative to NNDSVDa\n    for when sparsity is not desired)\n\n- 'custom': use custom matrices W and H if `update_H=True`. If\n  `update_H=False`, then only custom matrix H is used.\n\n.. versionchanged:: 0.23\n    The default value of `init` changed from 'random' to None in 0.23.\n\n.. versionchanged:: 1.1\n    When `init=None` and n_components is less than n_samples and n_features\n    defaults to `nndsvda` instead of `nndsvd`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["nndsvdar", "nndsvda", "nndsvd", "custom", "random"]
+                        "values": ["nndsvd", "custom", "nndsvdar", "nndsvda", "random"]
                     }
                 },
                 {
@@ -75300,11 +75300,11 @@
                     "docstring": {
                         "type": "{'cd', 'mu'}",
                         "default_value": "'cd'",
-                        "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\nAlternating Least Squares (Fast HALS).\n\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\nCoordinate Descent solver.\n\n.. versionadded:: 0.19\nMultiplicative Update solver."
+                        "description": "Numerical solver to use:\n\n- 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n    Alternating Least Squares (Fast HALS).\n\n- 'mu' is a Multiplicative Update solver.\n\n.. versionadded:: 0.17\n   Coordinate Descent solver.\n\n.. versionadded:: 0.19\n   Multiplicative Update solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["cd", "mu"]
+                        "values": ["mu", "cd"]
                     }
                 },
                 {
@@ -75324,7 +75324,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["frobenius", "itakura-saito", "kullback-leibler"]
+                                "values": ["itakura-saito", "frobenius", "kullback-leibler"]
                             },
                             {
                                 "kind": "NamedType",
@@ -75377,7 +75377,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "Constant that multiplies the regularization terms. Set it to zero to have no\nregularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\nregularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. deprecated:: 1.0\nThe `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\nUse `alpha_W` and `alpha_H` instead."
+                        "description": "Constant that multiplies the regularization terms. Set it to zero to have no\nregularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\nregularization terms are not scaled by the `n_features` (resp. `n_samples`)\nfactors for `W` (resp. `H`).\n\n.. deprecated:: 1.0\n    The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n    Use `alpha_W` and `alpha_H` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -75454,11 +75454,11 @@
                     "docstring": {
                         "type": "{'both', 'components', 'transformation'}",
                         "default_value": "None",
-                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. deprecated:: 1.0\nThe `regularization` parameter is deprecated in 1.0 and will be removed in\n1.2. Use `alpha_W` and `alpha_H` instead."
+                        "description": "Select whether the regularization affects the components (H), the\ntransformation (W), both or none of them.\n\n.. deprecated:: 1.0\n    The `regularization` parameter is deprecated in 1.0 and will be removed in\n    1.2. Use `alpha_W` and `alpha_H` instead."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["components", "transformation", "both"]
+                        "values": ["both", "transformation", "components"]
                     }
                 },
                 {
@@ -75529,7 +75529,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.decomposition"],
-            "description": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n.. math::\n\nL(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n&+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n&+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n&+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n&+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.",
+            "description": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.",
             "docstring": "Compute Non-negative Matrix Factorization (NMF).\n\nFind two non-negative matrices (W, H) whose product approximates the non-\nnegative matrix X. This factorization can be used for example for\ndimensionality reduction, source separation or topic extraction.\n\nThe objective function is:\n\n    .. math::\n\n        L(W, H) &= 0.5 * ||X - WH||_{loss}^2\n\n        &+ alpha\\_W * l1\\_ratio * n\\_features * ||vec(W)||_1\n\n        &+ alpha\\_H * l1\\_ratio * n\\_samples * ||vec(H)||_1\n\n        &+ 0.5 * alpha\\_W * (1 - l1\\_ratio) * n\\_features * ||W||_{Fro}^2\n\n        &+ 0.5 * alpha\\_H * (1 - l1\\_ratio) * n\\_samples * ||H||_{Fro}^2\n\n\nWhere:\n\n:math:`||A||_{Fro}^2 = \\sum_{i,j} A_{ij}^2` (Frobenius norm)\n\n:math:`||vec(A)||_1 = \\sum_{i,j} abs(A_{ij})` (Elementwise L1 norm)\n\nThe generic norm :math:`||X - WH||_{loss}^2` may represent\nthe Frobenius norm or another supported beta-divergence loss.\nThe choice between options is controlled by the `beta_loss` parameter.\n\nThe regularization terms are scaled by `n_features` for `W` and by `n_samples` for\n`H` to keep their impact balanced with respect to one another and to the data fit\nterm as independent as possible of the size `n_samples` of the training set.\n\nThe objective function is minimized with an alternating minimization of W\nand H. If H is given and update_H=False, it solves for W only.\n\nNote that the transformed data is named W and the components matrix is named H. In\nthe NMF literature, the naming convention is usually the opposite since the data\nmatrix X is transposed.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Constant matrix.\n\nW : array-like of shape (n_samples, n_components), default=None\n    If init='custom', it is used as initial guess for the solution.\n\nH : array-like of shape (n_components, n_features), default=None\n    If init='custom', it is used as initial guess for the solution.\n    If update_H=False, it is used as a constant, to solve for W only.\n\nn_components : int, default=None\n    Number of components, if n_components is not set all features\n    are kept.\n\ninit : {'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom'}, default=None\n    Method used to initialize the procedure.\n\n    Valid options:\n\n    - None: 'nndsvda' if n_components < n_features, otherwise 'random'.\n\n    - 'random': non-negative random matrices, scaled with:\n        sqrt(X.mean() / n_components)\n\n    - 'nndsvd': Nonnegative Double Singular Value Decomposition (NNDSVD)\n        initialization (better for sparseness)\n\n    - 'nndsvda': NNDSVD with zeros filled with the average of X\n        (better when sparsity is not desired)\n\n    - 'nndsvdar': NNDSVD with zeros filled with small random values\n        (generally faster, less accurate alternative to NNDSVDa\n        for when sparsity is not desired)\n\n    - 'custom': use custom matrices W and H if `update_H=True`. If\n      `update_H=False`, then only custom matrix H is used.\n\n    .. versionchanged:: 0.23\n        The default value of `init` changed from 'random' to None in 0.23.\n\n    .. versionchanged:: 1.1\n        When `init=None` and n_components is less than n_samples and n_features\n        defaults to `nndsvda` instead of `nndsvd`.\n\nupdate_H : bool, default=True\n    Set to True, both W and H will be estimated from initial guesses.\n    Set to False, only W will be estimated.\n\nsolver : {'cd', 'mu'}, default='cd'\n    Numerical solver to use:\n\n    - 'cd' is a Coordinate Descent solver that uses Fast Hierarchical\n        Alternating Least Squares (Fast HALS).\n\n    - 'mu' is a Multiplicative Update solver.\n\n    .. versionadded:: 0.17\n       Coordinate Descent solver.\n\n    .. versionadded:: 0.19\n       Multiplicative Update solver.\n\nbeta_loss : float or {'frobenius', 'kullback-leibler',             'itakura-saito'}, default='frobenius'\n    Beta divergence to be minimized, measuring the distance between X\n    and the dot product WH. Note that values different from 'frobenius'\n    (or 2) and 'kullback-leibler' (or 1) lead to significantly slower\n    fits. Note that for beta_loss <= 0 (or 'itakura-saito'), the input\n    matrix X cannot contain zeros. Used only in 'mu' solver.\n\n    .. versionadded:: 0.19\n\ntol : float, default=1e-4\n    Tolerance of the stopping condition.\n\nmax_iter : int, default=200\n    Maximum number of iterations before timing out.\n\nalpha : float, default=0.0\n    Constant that multiplies the regularization terms. Set it to zero to have no\n    regularization. When using `alpha` instead of `alpha_W` and `alpha_H`, the\n    regularization terms are not scaled by the `n_features` (resp. `n_samples`)\n    factors for `W` (resp. `H`).\n\n    .. deprecated:: 1.0\n        The `alpha` parameter is deprecated in 1.0 and will be removed in 1.2.\n        Use `alpha_W` and `alpha_H` instead.\n\nalpha_W : float, default=0.0\n    Constant that multiplies the regularization terms of `W`. Set it to zero\n    (default) to have no regularization on `W`.\n\n    .. versionadded:: 1.0\n\nalpha_H : float or \"same\", default=\"same\"\n    Constant that multiplies the regularization terms of `H`. Set it to zero to\n    have no regularization on `H`. If \"same\" (default), it takes the same value as\n    `alpha_W`.\n\n    .. versionadded:: 1.0\n\nl1_ratio : float, default=0.0\n    The regularization mixing parameter, with 0 <= l1_ratio <= 1.\n    For l1_ratio = 0 the penalty is an elementwise L2 penalty\n    (aka Frobenius Norm).\n    For l1_ratio = 1 it is an elementwise L1 penalty.\n    For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.\n\nregularization : {'both', 'components', 'transformation'}, default=None\n    Select whether the regularization affects the components (H), the\n    transformation (W), both or none of them.\n\n    .. deprecated:: 1.0\n        The `regularization` parameter is deprecated in 1.0 and will be removed in\n        1.2. Use `alpha_W` and `alpha_H` instead.\n\nrandom_state : int, RandomState instance or None, default=None\n    Used for NMF initialisation (when ``init`` == 'nndsvdar' or\n    'random'), and in Coordinate Descent. Pass an int for reproducible\n    results across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nverbose : int, default=0\n    The verbosity level.\n\nshuffle : bool, default=False\n    If true, randomize the order of coordinates in the CD solver.\n\nReturns\n-------\nW : ndarray of shape (n_samples, n_components)\n    Solution to the non-negative least squares problem.\n\nH : ndarray of shape (n_components, n_features)\n    Solution to the non-negative least squares problem.\n\nn_iter : int\n    Actual number of iterations.\n\nExamples\n--------\n>>> import numpy as np\n>>> X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])\n>>> from sklearn.decomposition import non_negative_factorization\n>>> W, H, n_iter = non_negative_factorization(X, n_components=2,\n... init='random', random_state=0)\n\nReferences\n----------\n.. [1] :doi:`\"Fast local algorithms for large scale nonnegative matrix and tensor\n   factorizations\" <10.1587/transfun.E92.A.708>`\n   Cichocki, Andrzej, and P. H. A. N. Anh-Huy. IEICE transactions on fundamentals\n   of electronics, communications and computer sciences 92.3: 708-721, 2009.\n\n.. [2] :doi:`\"Algorithms for nonnegative matrix factorization with the\n   beta-divergence\" <10.1162/NECO_a_00168>`\n   Fevotte, C., & Idier, J. (2011). Neural Computation, 23(9)."
         },
         {
@@ -75639,7 +75639,7 @@
                     "docstring": {
                         "type": "int, float or 'mle'",
                         "default_value": "None",
-                        "description": "Number of components to keep.\nif n_components is not set all components are kept::\n\nn_components == min(n_samples, n_features)\n\nIf ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\nMLE is used to guess the dimension. Use of ``n_components == 'mle'``\nwill interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\nIf ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\nnumber of components such that the amount of variance that needs to be\nexplained is greater than the percentage specified by n_components.\n\nIf ``svd_solver == 'arpack'``, the number of components must be\nstrictly less than the minimum of n_features and n_samples.\n\nHence, the None case results in::\n\nn_components == min(n_samples, n_features) - 1"
+                        "description": "Number of components to keep.\nif n_components is not set all components are kept::\n\n    n_components == min(n_samples, n_features)\n\nIf ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\nMLE is used to guess the dimension. Use of ``n_components == 'mle'``\nwill interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\nIf ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\nnumber of components such that the amount of variance that needs to be\nexplained is greater than the percentage specified by n_components.\n\nIf ``svd_solver == 'arpack'``, the number of components must be\nstrictly less than the minimum of n_features and n_samples.\n\nHence, the None case results in::\n\n    n_components == min(n_samples, n_features) - 1"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -75703,11 +75703,11 @@
                     "docstring": {
                         "type": "{'auto', 'full', 'arpack', 'randomized'}",
                         "default_value": "'auto'",
-                        "description": "If auto :\nThe solver is selected by a default policy based on `X.shape` and\n`n_components`: if the input data is larger than 500x500 and the\nnumber of components to extract is lower than 80% of the smallest\ndimension of the data, then the more efficient 'randomized'\nmethod is enabled. Otherwise the exact full SVD is computed and\noptionally truncated afterwards.\nIf full :\nrun exact full SVD calling the standard LAPACK solver via\n`scipy.linalg.svd` and select the components by postprocessing\nIf arpack :\nrun SVD truncated to n_components calling ARPACK solver via\n`scipy.sparse.linalg.svds`. It requires strictly\n0 < n_components < min(X.shape)\nIf randomized :\nrun randomized SVD by the method of Halko et al.\n\n.. versionadded:: 0.18.0"
+                        "description": "If auto :\n    The solver is selected by a default policy based on `X.shape` and\n    `n_components`: if the input data is larger than 500x500 and the\n    number of components to extract is lower than 80% of the smallest\n    dimension of the data, then the more efficient 'randomized'\n    method is enabled. Otherwise the exact full SVD is computed and\n    optionally truncated afterwards.\nIf full :\n    run exact full SVD calling the standard LAPACK solver via\n    `scipy.linalg.svd` and select the components by postprocessing\nIf arpack :\n    run SVD truncated to n_components calling ARPACK solver via\n    `scipy.sparse.linalg.svds`. It requires strictly\n    0 < n_components < min(X.shape)\nIf randomized :\n    run randomized SVD by the method of Halko et al.\n\n.. versionadded:: 0.18.0"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "full", "randomized", "arpack"]
+                        "values": ["full", "arpack", "randomized", "auto"]
                     }
                 },
                 {
@@ -77582,11 +77582,11 @@
                     "docstring": {
                         "type": "{'svd', 'lsqr', 'eigen'}",
                         "default_value": "'svd'",
-                        "description": "Solver to use, possible values:\n- 'svd': Singular value decomposition (default).\nDoes not compute the covariance matrix, therefore this solver is\nrecommended for data with a large number of features.\n- 'lsqr': Least squares solution.\nCan be combined with shrinkage or custom covariance estimator.\n- 'eigen': Eigenvalue decomposition.\nCan be combined with shrinkage or custom covariance estimator."
+                        "description": "Solver to use, possible values:\n  - 'svd': Singular value decomposition (default).\n    Does not compute the covariance matrix, therefore this solver is\n    recommended for data with a large number of features.\n  - 'lsqr': Least squares solution.\n    Can be combined with shrinkage or custom covariance estimator.\n  - 'eigen': Eigenvalue decomposition.\n    Can be combined with shrinkage or custom covariance estimator."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["lsqr", "eigen", "svd"]
+                        "values": ["eigen", "svd", "lsqr"]
                     }
                 },
                 {
@@ -77599,7 +77599,7 @@
                     "docstring": {
                         "type": "'auto' or float",
                         "default_value": "None",
-                        "description": "Shrinkage parameter, possible values:\n- None: no shrinkage (default).\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage parameter.\n\nThis should be left to None if `covariance_estimator` is used.\nNote that shrinkage works only with 'lsqr' and 'eigen' solvers."
+                        "description": "Shrinkage parameter, possible values:\n  - None: no shrinkage (default).\n  - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n  - float between 0 and 1: fixed shrinkage parameter.\n\nThis should be left to None if `covariance_estimator` is used.\nNote that shrinkage works only with 'lsqr' and 'eigen' solvers."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -77704,7 +77704,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n*LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.",
+            "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n   *LinearDiscriminantAnalysis*.\n\nRead more in the :ref:`User Guide <lda_qda>`.",
             "docstring": ""
         },
         {
@@ -77771,7 +77771,7 @@
                     "docstring": {
                         "type": "'auto', float or None",
                         "default_value": "",
-                        "description": "Shrinkage parameter, possible values:\n- None: no shrinkage.\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage constant.\n\nShrinkage parameter is ignored if  `covariance_estimator` i\nnot None"
+                        "description": "Shrinkage parameter, possible values:\n  - None: no shrinkage.\n  - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n  - float between 0 and 1: fixed shrinkage constant.\n\nShrinkage parameter is ignored if  `covariance_estimator` i\nnot None"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -77879,7 +77879,7 @@
                     "docstring": {
                         "type": "'auto', float or None",
                         "default_value": "",
-                        "description": "Shrinkage parameter, possible values:\n- None: no shrinkage.\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if  `covariance_estimator` i\nnot None"
+                        "description": "Shrinkage parameter, possible values:\n  - None: no shrinkage.\n  - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n  - float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if  `covariance_estimator` i\nnot None"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -78086,7 +78086,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Fit the Linear Discriminant Analysis model.\n\n.. versionchanged:: 0.19\n*store_covariance* has been moved to main constructor.\n\n.. versionchanged:: 0.19\n*tol* has been moved to main constructor.",
+            "description": "Fit the Linear Discriminant Analysis model.\n\n   .. versionchanged:: 0.19\n      *store_covariance* has been moved to main constructor.\n\n   .. versionchanged:: 0.19\n      *tol* has been moved to main constructor.",
             "docstring": "Fit the Linear Discriminant Analysis model.\n\n   .. versionchanged:: 0.19\n      *store_covariance* has been moved to main constructor.\n\n   .. versionchanged:: 0.19\n      *tol* has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training data.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nReturns\n-------\nself : object\n    Fitted estimator."
         },
         {
@@ -78313,7 +78313,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n*QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
+            "description": "Quadratic Discriminant Analysis.\n\nA classifier with a quadratic decision boundary, generated\nby fitting class conditional densities to the data\nand using Bayes' rule.\n\nThe model fits a Gaussian density to each class.\n\n.. versionadded:: 0.17\n   *QuadraticDiscriminantAnalysis*\n\nRead more in the :ref:`User Guide <lda_qda>`.",
             "docstring": ""
         },
         {
@@ -78459,7 +78459,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Fit the model according to the given training data and parameters.\n\n.. versionchanged:: 0.19\n``store_covariances`` has been moved to main constructor as\n``store_covariance``\n\n.. versionchanged:: 0.19\n``tol`` has been moved to main constructor.",
+            "description": "Fit the model according to the given training data and parameters.\n\n    .. versionchanged:: 0.19\n       ``store_covariances`` has been moved to main constructor as\n       ``store_covariance``\n\n    .. versionchanged:: 0.19\n       ``tol`` has been moved to main constructor.",
             "docstring": "Fit the model according to the given training data and parameters.\n\n    .. versionchanged:: 0.19\n       ``store_covariances`` has been moved to main constructor as\n       ``store_covariance``\n\n    .. versionchanged:: 0.19\n       ``tol`` has been moved to main constructor.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples and\n    `n_features` is the number of features.\n\ny : array-like of shape (n_samples,)\n    Target values (integers).\n\nReturns\n-------\nself : object\n    Fitted estimator."
         },
         {
@@ -78661,7 +78661,7 @@
                     "docstring": {
                         "type": "'auto' or float",
                         "default_value": "None",
-                        "description": "Shrinkage parameter, possible values:\n- None: no shrinkage (default).\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if `covariance_estimator` is not None."
+                        "description": "Shrinkage parameter, possible values:\n  - None: no shrinkage (default).\n  - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n  - float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if `covariance_estimator` is not None."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -78800,7 +78800,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\nInput data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\nShrinkage parameter, possible values:\n- None or 'empirical': no shrinkage (default).\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage parameter.\n\nShrinkage parameter is ignored if  `covariance_estimator`\nis not None.\n\ncovariance_estimator : estimator, default=None\nIf not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying on the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in :mod:`sklearn.covariance``.\nif None the shrinkage parameter drives the estimate.\n\n.. versionadded:: 0.24",
+            "description": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n    Shrinkage parameter, possible values:\n      - None or 'empirical': no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    Shrinkage parameter is ignored if  `covariance_estimator`\n    is not None.\n\ncovariance_estimator : estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance``.\n    if None the shrinkage parameter drives the estimate.\n\n    .. versionadded:: 0.24",
             "docstring": "Estimate covariance matrix (using optional covariance_estimator).\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Input data.\n\nshrinkage : {'empirical', 'auto'} or float, default=None\n    Shrinkage parameter, possible values:\n      - None or 'empirical': no shrinkage (default).\n      - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n      - float between 0 and 1: fixed shrinkage parameter.\n\n    Shrinkage parameter is ignored if  `covariance_estimator`\n    is not None.\n\ncovariance_estimator : estimator, default=None\n    If not None, `covariance_estimator` is used to estimate\n    the covariance matrices instead of relying on the empirical\n    covariance estimator (with potential shrinkage).\n    The object should have a fit method and a ``covariance_`` attribute\n    like the estimators in :mod:`sklearn.covariance``.\n    if None the shrinkage parameter drives the estimate.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ns : ndarray of shape (n_features, n_features)\n    Estimated covariance matrix."
         },
         {
@@ -78833,11 +78833,11 @@
                     "docstring": {
                         "type": "{\"most_frequent\", \"prior\", \"stratified\", \"uniform\",             \"constant\"}",
                         "default_value": "\"prior\"",
-                        "description": "Strategy to use to generate predictions.\n\n* \"most_frequent\": the `predict` method always returns the most\nfrequent class label in the observed `y` argument passed to `fit`.\nThe `predict_proba` method returns the matching one-hot encoded\nvector.\n* \"prior\": the `predict` method always returns the most frequent\nclass label in the observed `y` argument passed to `fit` (like\n\"most_frequent\"). ``predict_proba`` always returns the empirical\nclass distribution of `y` also known as the empirical class prior\ndistribution.\n* \"stratified\": the `predict_proba` method randomly samples one-hot\nvectors from a multinomial distribution parametrized by the empirical\nclass prior probabilities.\nThe `predict` method returns the class label which got probability\none in the one-hot vector of `predict_proba`.\nEach sampled row of both methods is therefore independent and\nidentically distributed.\n* \"uniform\": generates predictions uniformly at random from the list\nof unique classes observed in `y`, i.e. each class has equal\nprobability.\n* \"constant\": always predicts a constant label that is provided by\nthe user. This is useful for metrics that evaluate a non-majority\nclass.\n\n.. versionchanged:: 0.24\nThe default value of `strategy` has changed to \"prior\" in version\n0.24."
+                        "description": "Strategy to use to generate predictions.\n\n* \"most_frequent\": the `predict` method always returns the most\n  frequent class label in the observed `y` argument passed to `fit`.\n  The `predict_proba` method returns the matching one-hot encoded\n  vector.\n* \"prior\": the `predict` method always returns the most frequent\n  class label in the observed `y` argument passed to `fit` (like\n  \"most_frequent\"). ``predict_proba`` always returns the empirical\n  class distribution of `y` also known as the empirical class prior\n  distribution.\n* \"stratified\": the `predict_proba` method randomly samples one-hot\n  vectors from a multinomial distribution parametrized by the empirical\n  class prior probabilities.\n  The `predict` method returns the class label which got probability\n  one in the one-hot vector of `predict_proba`.\n  Each sampled row of both methods is therefore independent and\n  identically distributed.\n* \"uniform\": generates predictions uniformly at random from the list\n  of unique classes observed in `y`, i.e. each class has equal\n  probability.\n* \"constant\": always predicts a constant label that is provided by\n  the user. This is useful for metrics that evaluate a non-majority\n  class.\n\n  .. versionchanged:: 0.24\n     The default value of `strategy` has changed to \"prior\" in version\n     0.24."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["constant", "prior", "stratified", "uniform", "most_frequent"]
+                        "values": ["uniform", "prior", "stratified", "constant", "most_frequent"]
                     }
                 },
                 {
@@ -79291,11 +79291,11 @@
                     "docstring": {
                         "type": "{\"mean\", \"median\", \"quantile\", \"constant\"}",
                         "default_value": "\"mean\"",
-                        "description": "Strategy to use to generate predictions.\n\n* \"mean\": always predicts the mean of the training set\n* \"median\": always predicts the median of the training set\n* \"quantile\": always predicts a specified quantile of the training set,\nprovided with the quantile parameter.\n* \"constant\": always predicts a constant value that is provided by\nthe user."
+                        "description": "Strategy to use to generate predictions.\n\n* \"mean\": always predicts the mean of the training set\n* \"median\": always predicts the median of the training set\n* \"quantile\": always predicts a specified quantile of the training set,\n  provided with the quantile parameter.\n* \"constant\": always predicts a constant value that is provided by\n  the user."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["median", "constant", "mean", "quantile"]
+                        "values": ["constant", "quantile", "median", "mean"]
                     }
                 },
                 {
@@ -79811,7 +79811,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to True, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit\na whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n*warm_start* constructor parameter."
+                        "description": "When set to True, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit\na whole new ensemble. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n   *warm_start* constructor parameter."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -80929,7 +80929,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Build a Bagging ensemble of estimators from the training\nset (X, y).",
+            "description": "Build a Bagging ensemble of estimators from the training\n   set (X, y).",
             "docstring": "Build a Bagging ensemble of estimators from the training\n   set (X, y).\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The training input samples. Sparse matrices are accepted only if\n    they are supported by the base estimator.\n\ny : array-like of shape (n_samples,)\n    The target values (class labels in classification, real numbers in\n    regression).\n\nmax_samples : int or float, default=None\n    Argument to use instead of self.max_samples.\n\nmax_depth : int, default=None\n    Override value used when constructing base estimator. Only\n    supported if the base estimator has a max_depth parameter.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights. If None, then samples are equally weighted.\n    Note that this is supported only if the base estimator supports\n    sample weighting.\n\ncheck_input : bool, default=True\n    Override value used when fitting base estimator. Only supported\n    if the base estimator has a check_input parameter for fit function.\n\nReturns\n-------\nself : object\n    Fitted estimator."
         },
         {
@@ -83138,7 +83138,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22."
+                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n   The default value of ``n_estimators`` changed from 10 to 100\n   in 0.22."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83159,7 +83159,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["entropy", "log_loss", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -83189,7 +83189,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83215,7 +83215,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83258,14 +83258,14 @@
                     "docstring": {
                         "type": "{\"sqrt\", \"log2\", None}, int or float",
                         "default_value": "\"sqrt\"",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`round(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `round(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\n    The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -83305,7 +83305,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83373,7 +83373,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n(if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\nsplit at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary <random_state>` for details."
+                        "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n  (if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\n  split at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary <random_state>` for details."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83484,7 +83484,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
+                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n  `max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83545,7 +83545,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22."
+                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n   The default value of ``n_estimators`` changed from 10 to 100\n   in 0.22."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83562,11 +83562,11 @@
                     "docstring": {
                         "type": "{\"squared_error\", \"absolute_error\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, and \"absolute_error\"\nfor the mean absolute error.\n\n.. versionadded:: 0.18\nMean Absolute Error (MAE) criterion.\n\n.. deprecated:: 1.0\nCriterion \"mse\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\nCriterion \"mae\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, and \"absolute_error\"\nfor the mean absolute error.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "squared_error"]
+                        "values": ["squared_error", "absolute_error"]
                     }
                 },
                 {
@@ -83596,7 +83596,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83622,7 +83622,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83665,14 +83665,14 @@
                     "docstring": {
                         "type": "{\"sqrt\", \"log2\", None}, int or float",
                         "default_value": "1.0",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`round(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None or 1.0, then `max_features=n_features`.\n\n.. note::\nThe default of 1.0 is equivalent to bagged trees and more\nrandomness can be achieved by setting smaller values, e.g. 0.3.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to 1.0.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `round(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None or 1.0, then `max_features=n_features`.\n\n.. note::\n    The default of 1.0 is equivalent to bagged trees and more\n    randomness can be achieved by setting smaller values, e.g. 0.3.\n\n.. versionchanged:: 1.1\n    The default of `max_features` changed from `\"auto\"` to 1.0.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -83712,7 +83712,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -83780,7 +83780,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n(if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\nsplit at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary <random_state>` for details."
+                        "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n  (if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\n  split at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary <random_state>` for details."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -83861,7 +83861,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
+                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n  `max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -84854,7 +84854,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22."
+                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n   The default value of ``n_estimators`` changed from 10 to 100\n   in 0.22."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -84875,7 +84875,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["entropy", "log_loss", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -84905,7 +84905,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -84931,7 +84931,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -84974,14 +84974,14 @@
                     "docstring": {
                         "type": "{\"sqrt\", \"log2\", None}, int or float",
                         "default_value": "\"sqrt\"",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`round(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `round(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\n    The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -85021,7 +85021,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85200,7 +85200,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
+                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n  `max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85261,7 +85261,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22."
+                        "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\n   The default value of ``n_estimators`` changed from 10 to 100\n   in 0.22."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85278,11 +85278,11 @@
                     "docstring": {
                         "type": "{\"squared_error\", \"absolute_error\", \"poisson\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, \"absolute_error\"\nfor the mean absolute error, and \"poisson\" which uses reduction in\nPoisson deviance to find splits.\nTraining using \"absolute_error\" is significantly slower\nthan when using \"squared_error\".\n\n.. versionadded:: 0.18\nMean Absolute Error (MAE) criterion.\n\n.. versionadded:: 1.0\nPoisson criterion.\n\n.. deprecated:: 1.0\nCriterion \"mse\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\nCriterion \"mae\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion, \"absolute_error\"\nfor the mean absolute error, and \"poisson\" which uses reduction in\nPoisson deviance to find splits.\nTraining using \"absolute_error\" is significantly slower\nthan when using \"squared_error\".\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 1.0\n   Poisson criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "poisson", "squared_error"]
+                        "values": ["squared_error", "absolute_error", "poisson"]
                     }
                 },
                 {
@@ -85312,7 +85312,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85338,7 +85338,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85381,14 +85381,14 @@
                     "docstring": {
                         "type": "{\"sqrt\", \"log2\", None}, int or float",
                         "default_value": "1.0",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`round(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None or 1.0, then `max_features=n_features`.\n\n.. note::\nThe default of 1.0 is equivalent to bagged trees and more\nrandomness can be achieved by setting smaller values, e.g. 0.3.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to 1.0.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `round(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None or 1.0, then `max_features=n_features`.\n\n.. note::\n    The default of 1.0 is equivalent to bagged trees and more\n    randomness can be achieved by setting smaller values, e.g. 0.3.\n\n.. versionchanged:: 1.1\n    The default of `max_features` changed from `\"auto\"` to 1.0.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["sqrt", "log2"]
+                                "values": ["log2", "sqrt"]
                             },
                             {
                                 "kind": "NamedType",
@@ -85428,7 +85428,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85577,7 +85577,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
+                        "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n  `max_samples` should be in the interval `(0.0, 1.0]`.\n\n.. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85638,7 +85638,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "100",
-                        "description": "Number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22."
+                        "description": "Number of trees in the forest.\n\n.. versionchanged:: 0.22\n   The default value of ``n_estimators`` changed from 10 to 100\n   in 0.22."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -85672,7 +85672,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` is the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` is the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85698,7 +85698,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` is the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` is the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -85758,7 +85758,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -86417,7 +86417,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "",
-                        "description": "The maximum number of samples to draw from the total available:\n- if float, this indicates a fraction of the total and should be\nthe interval `(0.0, 1.0]`;\n- if int, this indicates the exact number of samples;\n- if None, this indicates the total number of samples."
+                        "description": "The maximum number of samples to draw from the total available:\n    - if float, this indicates a fraction of the total and should be\n      the interval `(0.0, 1.0]`;\n    - if int, this indicates the exact number of samples;\n    - if None, this indicates the total number of samples."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -87979,7 +87979,7 @@
                     "docstring": {
                         "type": "{'log_loss', 'deviance', 'exponential'}",
                         "default_value": "'log_loss'",
-                        "description": "The loss function to be optimized. 'log_loss' refers to binomial and\nmultinomial deviance, the same as used in logistic regression.\nIt is a good choice for classification with probabilistic outputs.\nFor loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n.. deprecated:: 1.1\nThe loss 'deviance' was deprecated in v1.1 and will be removed in\nversion 1.3. Use `loss='log_loss'` which is equivalent."
+                        "description": "The loss function to be optimized. 'log_loss' refers to binomial and\nmultinomial deviance, the same as used in logistic regression.\nIt is a good choice for classification with probabilistic outputs.\nFor loss 'exponential', gradient boosting recovers the AdaBoost algorithm.\n\n.. deprecated:: 1.1\n    The loss 'deviance' was deprecated in v1.1 and will be removed in\n    version 1.3. Use `loss='log_loss'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -88060,11 +88060,11 @@
                     "docstring": {
                         "type": "{'friedman_mse', 'squared_error', 'mse'}",
                         "default_value": "'friedman_mse'",
-                        "description": "The function to measure the quality of a split. Supported criteria are\n'friedman_mse' for the mean squared error with improvement score by\nFriedman, 'squared_error' for mean squared error. The default value of\n'friedman_mse' is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\nCriterion 'mse' was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion='squared_error'` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria are\n'friedman_mse' for the mean squared error with improvement score by\nFriedman, 'squared_error' for mean squared error. The default value of\n'friedman_mse' is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    Criterion 'mse' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mse", "squared_error", "friedman_mse"]
+                        "values": ["squared_error", "friedman_mse", "mse"]
                     }
                 },
                 {
@@ -88077,7 +88077,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, values must be in the range `[2, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\nwill be `ceil(min_samples_split * n_samples)`.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, values must be in the range `[2, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n  will be `ceil(min_samples_split * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -88111,7 +88111,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\nwill be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -88192,7 +88192,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\nValues must be in the range `[0.0, inf)`.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\nValues must be in the range `[0.0, inf)`.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -88265,7 +88265,7 @@
                     "docstring": {
                         "type": "{'auto', 'sqrt', 'log2'}, int or float",
                         "default_value": "None",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and the features\nconsidered at each split will be `int(max_features * n_features)`.\n- If 'auto', then `max_features=sqrt(n_features)`.\n- If 'sqrt', then `max_features=sqrt(n_features)`.\n- If 'log2', then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and the features\n  considered at each split will be `int(max_features * n_features)`.\n- If 'auto', then `max_features=sqrt(n_features)`.\n- If 'sqrt', then `max_features=sqrt(n_features)`.\n- If 'log2', then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -88280,7 +88280,7 @@
                             },
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -88888,11 +88888,11 @@
                     "docstring": {
                         "type": "{'squared_error', 'absolute_error', 'huber', 'quantile'}",
                         "default_value": "'squared_error'",
-                        "description": "Loss function to be optimized. 'squared_error' refers to the squared\nerror for regression. 'absolute_error' refers to the absolute error of\nregression and is a robust loss function. 'huber' is a\ncombination of the two. 'quantile' allows quantile regression (use\n`alpha` to specify the quantile).\n\n.. deprecated:: 1.0\nThe loss 'ls' was deprecated in v1.0 and will be removed in\nversion 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\nThe loss 'lad' was deprecated in v1.0 and will be removed in\nversion 1.2. Use `loss='absolute_error'` which is equivalent."
+                        "description": "Loss function to be optimized. 'squared_error' refers to the squared\nerror for regression. 'absolute_error' refers to the absolute error of\nregression and is a robust loss function. 'huber' is a\ncombination of the two. 'quantile' allows quantile regression (use\n`alpha` to specify the quantile).\n\n.. deprecated:: 1.0\n    The loss 'ls' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'lad' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `loss='absolute_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "huber", "squared_error", "quantile"]
+                        "values": ["squared_error", "huber", "quantile", "absolute_error"]
                     }
                 },
                 {
@@ -88969,11 +88969,11 @@
                     "docstring": {
                         "type": "{'friedman_mse', 'squared_error', 'mse'}",
                         "default_value": "'friedman_mse'",
-                        "description": "The function to measure the quality of a split. Supported criteria are\n\"friedman_mse\" for the mean squared error with improvement score by\nFriedman, \"squared_error\" for mean squared error. The default value of\n\"friedman_mse\" is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\nCriterion 'mse' was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion='squared_error'` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria are\n\"friedman_mse\" for the mean squared error with improvement score by\nFriedman, \"squared_error\" for mean squared error. The default value of\n\"friedman_mse\" is generally the best as it can provide a better\napproximation in some cases.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    Criterion 'mse' was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["mse", "squared_error", "friedman_mse"]
+                        "values": ["squared_error", "friedman_mse", "mse"]
                     }
                 },
                 {
@@ -88986,7 +88986,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, values must be in the range `[2, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\nwill be `ceil(min_samples_split * n_samples)`.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, values must be in the range `[2, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_split`\n  will be `ceil(min_samples_split * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -89020,7 +89020,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\nwill be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf`\n  will be `ceil(min_samples_leaf * n_samples)`.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -89101,7 +89101,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\nValues must be in the range `[0.0, inf)`.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\nValues must be in the range `[0.0, inf)`.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -89174,7 +89174,7 @@
                     "docstring": {
                         "type": "{'auto', 'sqrt', 'log2'}, int or float",
                         "default_value": "None",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and the features\nconsidered at each split will be `int(max_features * n_features)`.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, values must be in the range `[1, inf)`.\n- If float, values must be in the range `(0.0, 1.0]` and the features\n  considered at each split will be `int(max_features * n_features)`.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nChoosing `max_features < n_features` leads to a reduction of variance\nand an increase in bias.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -89189,7 +89189,7 @@
                             },
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -90091,7 +90091,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\nsum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual",
+            "description": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n    sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual",
             "docstring": "Make a single Newton-Raphson step.\n\nour node estimate is given by:\n\n    sum(w * (y - prob)) / sum(w * prob * (1 - prob))\n\nwe take advantage that: y - prob = residual"
         },
         {
@@ -92050,7 +92050,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "Learning rate shrinks the contribution of each tree by\n``learning_rate``."
+                        "description": "Learning rate shrinks the contribution of each tree by\n ``learning_rate``."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -92654,7 +92654,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "Learning rate shrinks the contribution of each tree by\n``learning_rate``."
+                        "description": "Learning rate shrinks the contribution of each tree by\n ``learning_rate``."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -95399,11 +95399,11 @@
                     "docstring": {
                         "type": "{'log_loss', 'auto', 'binary_crossentropy', 'categorical_crossentropy'}",
                         "default_value": "'log_loss'",
-                        "description": "The loss function to use in the boosting process.\n\nFor binary classification problems, 'log_loss' is also known as logistic loss,\nbinomial deviance or binary crossentropy. Internally, the model fits one tree\nper boosting iteration and uses the logistic sigmoid function (expit) as\ninverse link function to compute the predicted positive class probability.\n\nFor multiclass classification problems, 'log_loss' is also known as multinomial\ndeviance or categorical crossentropy. Internally, the model fits one tree per\nboosting iteration and per class and uses the softmax function as inverse link\nfunction to compute the predicted probabilities of the classes.\n\n.. deprecated:: 1.1\nThe loss arguments 'auto', 'binary_crossentropy' and\n'categorical_crossentropy' were deprecated in v1.1 and will be removed in\nversion 1.3. Use `loss='log_loss'` which is equivalent."
+                        "description": "The loss function to use in the boosting process.\n\nFor binary classification problems, 'log_loss' is also known as logistic loss,\nbinomial deviance or binary crossentropy. Internally, the model fits one tree\nper boosting iteration and uses the logistic sigmoid function (expit) as\ninverse link function to compute the predicted positive class probability.\n\nFor multiclass classification problems, 'log_loss' is also known as multinomial\ndeviance or categorical crossentropy. Internally, the model fits one tree per\nboosting iteration and per class and uses the softmax function as inverse link\nfunction to compute the predicted probabilities of the classes.\n\n.. deprecated:: 1.1\n    The loss arguments 'auto', 'binary_crossentropy' and\n    'categorical_crossentropy' were deprecated in v1.1 and will be removed in\n    version 1.3. Use `loss='log_loss'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["categorical_crossentropy", "binary_crossentropy", "auto", "log_loss"]
+                        "values": ["categorical_crossentropy", "binary_crossentropy", "log_loss", "auto"]
                     }
                 },
                 {
@@ -95553,7 +95553,7 @@
                     "docstring": {
                         "type": "array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,)",
                         "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\nfeatures.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
+                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -96184,11 +96184,11 @@
                     "docstring": {
                         "type": "{'squared_error', 'absolute_error', 'poisson', 'quantile'}",
                         "default_value": "'squared_error'",
-                        "description": "The loss function to use in the boosting process. Note that the\n\"squared error\" and \"poisson\" losses actually implement\n\"half least squares loss\" and \"half poisson deviance\" to simplify the\ncomputation of the gradient. Furthermore, \"poisson\" loss internally\nuses a log-link and requires ``y >= 0``.\n\"quantile\" uses the pinball loss.\n\n.. versionchanged:: 0.23\nAdded option 'poisson'.\n\n.. versionchanged:: 1.1\nAdded option 'quantile'.\n\n.. deprecated:: 1.0\nThe loss 'least_squares' was deprecated in v1.0 and will be removed\nin version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\nThe loss 'least_absolute_deviation' was deprecated in v1.0 and will\nbe removed in version 1.2. Use `loss='absolute_error'` which is\nequivalent."
+                        "description": "The loss function to use in the boosting process. Note that the\n\"squared error\" and \"poisson\" losses actually implement\n\"half least squares loss\" and \"half poisson deviance\" to simplify the\ncomputation of the gradient. Furthermore, \"poisson\" loss internally\nuses a log-link and requires ``y >= 0``.\n\"quantile\" uses the pinball loss.\n\n.. versionchanged:: 0.23\n   Added option 'poisson'.\n\n.. versionchanged:: 1.1\n   Added option 'quantile'.\n\n.. deprecated:: 1.0\n    The loss 'least_squares' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'least_absolute_deviation' was deprecated in v1.0 and will\n    be removed in version 1.2. Use `loss='absolute_error'` which is\n    equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "poisson", "squared_error", "quantile"]
+                        "values": ["squared_error", "quantile", "absolute_error", "poisson"]
                     }
                 },
                 {
@@ -96355,7 +96355,7 @@
                     "docstring": {
                         "type": "array-like of {bool, int} of shape (n_features)             or shape (n_categorical_features,)",
                         "default_value": "None",
-                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\nfeatures.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
+                        "description": "Indicates the categorical features.\n\n- None : no feature will be considered categorical.\n- boolean array-like : boolean mask indicating categorical features.\n- integer array-like : integer indices indicating categorical\n  features.\n\nFor each categorical feature, there must be at most `max_bins` unique\ncategories, and each categorical value must be in [0, max_bins -1].\n\nRead more in the :ref:`User Guide <categorical_support_gbdt>`.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -96842,7 +96842,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Update the leaf values to be predicted by the tree.\n\nUpdate equals:\nloss.fit_intercept_only(y_true - raw_prediction)\n\nThis is only applied if loss.need_update_leaves_values is True.\nNote: It only works, if the loss is a function of the residual, as is the\ncase for AbsoluteError and PinballLoss. Otherwise, one would need to get\nthe minimum of loss(y_true, raw_prediction + x) in x. A few examples:\n- AbsoluteError: median(y_true - raw_prediction).\n- PinballLoss: quantile(y_true - raw_prediction).\nSee also notes about need_update_leaves_values in BaseLoss.",
+            "description": "Update the leaf values to be predicted by the tree.\n\nUpdate equals:\n    loss.fit_intercept_only(y_true - raw_prediction)\n\nThis is only applied if loss.need_update_leaves_values is True.\nNote: It only works, if the loss is a function of the residual, as is the\ncase for AbsoluteError and PinballLoss. Otherwise, one would need to get\nthe minimum of loss(y_true, raw_prediction + x) in x. A few examples:\n  - AbsoluteError: median(y_true - raw_prediction).\n  - PinballLoss: quantile(y_true - raw_prediction).\nSee also notes about need_update_leaves_values in BaseLoss.",
             "docstring": "Update the leaf values to be predicted by the tree.\n\nUpdate equals:\n    loss.fit_intercept_only(y_true - raw_prediction)\n\nThis is only applied if loss.need_update_leaves_values is True.\nNote: It only works, if the loss is a function of the residual, as is the\ncase for AbsoluteError and PinballLoss. Otherwise, one would need to get\nthe minimum of loss(y_true, raw_prediction + x) in x. A few examples:\n  - AbsoluteError: median(y_true - raw_prediction).\n  - PinballLoss: quantile(y_true - raw_prediction).\nSee also notes about need_update_leaves_values in BaseLoss."
         },
         {
@@ -98447,7 +98447,7 @@
                     "docstring": {
                         "type": "\"auto\", int or float",
                         "default_value": "\"auto\"",
-                        "description": "The number of samples to draw from X to train each base estimator.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples.\n- If \"auto\", then `max_samples=min(256, n_samples)`.\n\nIf max_samples is larger than the number of samples provided,\nall samples will be used for all trees (no sampling)."
+                        "description": "The number of samples to draw from X to train each base estimator.\n    - If int, then draw `max_samples` samples.\n    - If float, then draw `max_samples * X.shape[0]` samples.\n    - If \"auto\", then `max_samples=min(256, n_samples)`.\n\nIf max_samples is larger than the number of samples provided,\nall samples will be used for all trees (no sampling)."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -98477,7 +98477,7 @@
                     "docstring": {
                         "type": "'auto' or float",
                         "default_value": "'auto'",
-                        "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. Used when fitting to define the threshold\non the scores of the samples.\n\n- If 'auto', the threshold is determined as in the\noriginal paper.\n- If float, the contamination should be in the range (0, 0.5].\n\n.. versionchanged:: 0.22\nThe default value of ``contamination`` changed from 0.1\nto ``'auto'``."
+                        "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. Used when fitting to define the threshold\non the scores of the samples.\n\n    - If 'auto', the threshold is determined as in the\n      original paper.\n    - If float, the contamination should be in the range (0, 0.5].\n\n.. versionchanged:: 0.22\n   The default value of ``contamination`` changed from 0.1\n   to ``'auto'``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -98511,7 +98511,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1.0",
-                        "description": "The number of features to draw from X to train each base estimator.\n\n- If int, then draw `max_features` features.\n- If float, then draw `max_features * X.shape[1]` features."
+                        "description": "The number of features to draw from X to train each base estimator.\n\n    - If int, then draw `max_features` features.\n    - If float, then draw `max_features * X.shape[1]` features."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -99122,7 +99122,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\nThe number of training samples in each test sample leaf, for\neach estimators.",
+            "description": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n    The number of training samples in each test sample leaf, for\n    each estimators.",
             "docstring": "The average path length in a n_samples iTree, which is equal to\nthe average path length of an unsuccessful BST search since the\nlatter has the same structure as an isolation tree.\nParameters\n----------\nn_samples_leaf : array-like of shape (n_samples,)\n    The number of training samples in each test sample leaf, for\n    each estimators.\n\nReturns\n-------\naverage_path_length : ndarray of shape (n_samples,)"
         },
         {
@@ -99189,7 +99189,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator, iterable, or \"prefit\"",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits,\n* `\"prefit\"` to assume the `estimators` are prefit. In this case, the\nestimators will not be refitted.\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that all `estimators` have\nbeen fitted already. The `final_estimator_` is trained on the `estimators`\npredictions on the full training set and are **not** cross validated\npredictions. Please note that if the models have been trained on the same\ndata to train the stacking model, there is a very high risk of overfitting.\n\n.. versionadded:: 1.1\nThe 'prefit' option was added in 1.1\n\n.. note::\nA larger number of split will provide no benefits if the number\nof training samples is large enough. Indeed, the training time\nwill increase. ``cv`` is not used for model evaluation but for\nprediction."
+                        "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits,\n* `\"prefit\"` to assume the `estimators` are prefit. In this case, the\n  estimators will not be refitted.\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that all `estimators` have\nbeen fitted already. The `final_estimator_` is trained on the `estimators`\npredictions on the full training set and are **not** cross validated\npredictions. Please note that if the models have been trained on the same\ndata to train the stacking model, there is a very high risk of overfitting.\n\n.. versionadded:: 1.1\n    The 'prefit' option was added in 1.1\n\n.. note::\n   A larger number of split will provide no benefits if the number\n   of training samples is large enough. Indeed, the training time\n   will increase. ``cv`` is not used for model evaluation but for\n   prediction."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -99223,11 +99223,11 @@
                     "docstring": {
                         "type": "{'auto', 'predict_proba', 'decision_function', 'predict'}",
                         "default_value": "'auto'",
-                        "description": "Methods called for each base estimator. It can be:\n\n* if 'auto', it will try to invoke, for each estimator,\n`'predict_proba'`, `'decision_function'` or `'predict'` in that\norder.\n* otherwise, one of `'predict_proba'`, `'decision_function'` or\n`'predict'`. If the method is not implemented by the estimator, it\nwill raise an error."
+                        "description": "Methods called for each base estimator. It can be:\n\n* if 'auto', it will try to invoke, for each estimator,\n  `'predict_proba'`, `'decision_function'` or `'predict'` in that\n  order.\n* otherwise, one of `'predict_proba'`, `'decision_function'` or\n  `'predict'`. If the method is not implemented by the estimator, it\n  will raise an error."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict", "auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -99722,7 +99722,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator, iterable, or \"prefit\"",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits.\n* \"prefit\" to assume the `estimators` are prefit, and skip cross validation\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that all `estimators` have\nbeen fitted already. The `final_estimator_` is trained on the `estimators`\npredictions on the full training set and are **not** cross validated\npredictions. Please note that if the models have been trained on the same\ndata to train the stacking model, there is a very high risk of overfitting.\n\n.. versionadded:: 1.1\nThe 'prefit' option was added in 1.1\n\n.. note::\nA larger number of split will provide no benefits if the number\nof training samples is large enough. Indeed, the training time\nwill increase. ``cv`` is not used for model evaluation but for\nprediction."
+                        "description": "Determines the cross-validation splitting strategy used in\n`cross_val_predict` to train `final_estimator`. Possible inputs for\ncv are:\n\n* None, to use the default 5-fold cross validation,\n* integer, to specify the number of folds in a (Stratified) KFold,\n* An object to be used as a cross-validation generator,\n* An iterable yielding train, test splits.\n* \"prefit\" to assume the `estimators` are prefit, and skip cross validation\n\nFor integer/None inputs, if the estimator is a classifier and y is\neither binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used.\nIn all other cases, :class:`~sklearn.model_selection.KFold` is used.\nThese splitters are instantiated with `shuffle=False` so the splits\nwill be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that all `estimators` have\nbeen fitted already. The `final_estimator_` is trained on the `estimators`\npredictions on the full training set and are **not** cross validated\npredictions. Please note that if the models have been trained on the same\ndata to train the stacking model, there is a very high risk of overfitting.\n\n.. versionadded:: 1.1\n    The 'prefit' option was added in 1.1\n\n.. note::\n   A larger number of split will provide no benefits if the number\n   of training samples is large enough. Indeed, the training time\n   will increase. ``cv`` is not used for model evaluation but for\n   prediction."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -100429,7 +100429,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,) or default=None",
                         "default_value": "",
-                        "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights.\n\n.. versionchanged:: 0.23\nwhen not None, `sample_weight` is passed to all underlying\nestimators"
+                        "description": "Sample weights. If None, then samples are equally weighted.\nNote that this is supported only if all underlying estimators\nsupport sample weights.\n\n.. versionchanged:: 0.23\n   when not None, `sample_weight` is passed to all underlying\n   estimators"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -100473,7 +100473,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features. The input feature names are only used when `passthrough` is\n`True`.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined.\n\nIf `passthrough` is `False`, then only the names of `estimators` are used\nto generate the output feature names."
+                        "description": "Input features. The input feature names are only used when `passthrough` is\n`True`.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then names are generated: `[x0, x1, ..., x(n_features_in_ - 1)]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined.\n\nIf `passthrough` is `False`, then only the names of `estimators` are used\nto generate the output feature names."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -100650,7 +100650,7 @@
                     "docstring": {
                         "type": "list of (str, estimator) tuples",
                         "default_value": "",
-                        "description": "Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'`` using\n:meth:`set_params`.\n\n.. versionchanged:: 0.21\n``'drop'`` is accepted. Using None was deprecated in 0.22 and\nsupport was removed in 0.24."
+                        "description": "Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'`` using\n:meth:`set_params`.\n\n.. versionchanged:: 0.21\n    ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n    support was removed in 0.24."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -101146,7 +101146,7 @@
                     "docstring": {
                         "type": "list of (str, estimator) tuples",
                         "default_value": "",
-                        "description": "Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'`` using\n:meth:`set_params`.\n\n.. versionchanged:: 0.21\n``'drop'`` is accepted. Using None was deprecated in 0.22 and\nsupport was removed in 0.24."
+                        "description": "Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones\nof those original estimators that will be stored in the class attribute\n``self.estimators_``. An estimator can be set to ``'drop'`` using\n:meth:`set_params`.\n\n.. versionchanged:: 0.21\n    ``'drop'`` is accepted. Using None was deprecated in 0.22 and\n    support was removed in 0.24."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -101916,7 +101916,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["SAMME", "SAMME.R"]
+                        "values": ["SAMME.R", "SAMME"]
                     }
                 },
                 {
@@ -102309,7 +102309,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\np(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n= softmax((1 / K-1) * f(X))",
+            "description": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n    p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n             = softmax((1 / K-1) * f(X))",
             "docstring": "Compute probabilities from the decision function.\n\nThis is based eq. (4) of [1] where:\n    p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X)))\n             = softmax((1 / K-1) * f(X))\n\nReferences\n----------\n.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, \"Multi-class AdaBoost\",\n       2009."
         },
         {
@@ -102873,7 +102873,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["exponential", "square", "linear"]
+                        "values": ["square", "linear", "exponential"]
                     }
                 },
                 {
@@ -103999,7 +103999,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "(INTERNAL) Decodes an attribute line.\n\nThe attribute is the most complex declaration in an arff file. All\nattributes must follow the template::\n\n@attribute <attribute-name> <datatype>\n\nwhere ``attribute-name`` is a string, quoted if the name contains any\nwhitespace, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n{<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\nThe nominal names follow the rules for the attribute names, i.e., they\nmust be quoted if the name contains whitespaces.\n\nThis method must receive a normalized string, i.e., a string without\npadding, including the \"\n\" characters.\n\n:param s: a normalized string.\n:return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).",
+            "description": "(INTERNAL) Decodes an attribute line.\n\n        The attribute is the most complex declaration in an arff file. All\n        attributes must follow the template::\n\n             @attribute <attribute-name> <datatype>\n\n        where ``attribute-name`` is a string, quoted if the name contains any\n        whitespace, and ``datatype`` can be:\n\n        - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n        - Strings as ``STRING``.\n        - Dates (NOT IMPLEMENTED).\n        - Nominal attributes with format:\n\n            {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\n        The nominal names follow the rules for the attribute names, i.e., they\n        must be quoted if the name contains whitespaces.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\n\" characters.\n\n        :param s: a normalized string.\n        :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).",
             "docstring": "(INTERNAL) Decodes an attribute line.\n\n        The attribute is the most complex declaration in an arff file. All\n        attributes must follow the template::\n\n             @attribute <attribute-name> <datatype>\n\n        where ``attribute-name`` is a string, quoted if the name contains any\n        whitespace, and ``datatype`` can be:\n\n        - Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n        - Strings as ``STRING``.\n        - Dates (NOT IMPLEMENTED).\n        - Nominal attributes with format:\n\n            {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\n        The nominal names follow the rules for the attribute names, i.e., they\n        must be quoted if the name contains whitespaces.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\r\n\" characters.\n\n        :param s: a normalized string.\n        :return: a tuple (ATTRIBUTE_NAME, TYPE_OR_VALUES).\n        "
         },
         {
@@ -104040,7 +104040,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "(INTERNAL) Decodes a comment line.\n\nComments are single line strings starting, obligatorily, with the ``%``\ncharacter, and can have any symbol, including whitespaces or special\ncharacters.\n\nThis method must receive a normalized string, i.e., a string without\npadding, including the \"\n\" characters.\n\n:param s: a normalized string.\n:return: a string with the decoded comment.",
+            "description": "(INTERNAL) Decodes a comment line.\n\n        Comments are single line strings starting, obligatorily, with the ``%``\n        character, and can have any symbol, including whitespaces or special\n        characters.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\n\" characters.\n\n        :param s: a normalized string.\n        :return: a string with the decoded comment.",
             "docstring": "(INTERNAL) Decodes a comment line.\n\n        Comments are single line strings starting, obligatorily, with the ``%``\n        character, and can have any symbol, including whitespaces or special\n        characters.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\r\n\" characters.\n\n        :param s: a normalized string.\n        :return: a string with the decoded comment.\n        "
         },
         {
@@ -104081,7 +104081,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "(INTERNAL) Decodes a relation line.\n\nThe relation declaration is a line with the format ``@RELATION\n<relation-name>``, where ``relation-name`` is a string. The string must\nstart with alphabetic character and must be quoted if the name includes\nspaces, otherwise this method will raise a `BadRelationFormat` exception.\n\nThis method must receive a normalized string, i.e., a string without\npadding, including the \"\n\" characters.\n\n:param s: a normalized string.\n:return: a string with the decoded relation name.",
+            "description": "(INTERNAL) Decodes a relation line.\n\n        The relation declaration is a line with the format ``@RELATION\n        <relation-name>``, where ``relation-name`` is a string. The string must\n        start with alphabetic character and must be quoted if the name includes\n        spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\n\" characters.\n\n        :param s: a normalized string.\n        :return: a string with the decoded relation name.",
             "docstring": "(INTERNAL) Decodes a relation line.\n\n        The relation declaration is a line with the format ``@RELATION\n        <relation-name>``, where ``relation-name`` is a string. The string must\n        start with alphabetic character and must be quoted if the name includes\n        spaces, otherwise this method will raise a `BadRelationFormat` exception.\n\n        This method must receive a normalized string, i.e., a string without\n        padding, including the \"\r\n\" characters.\n\n        :param s: a normalized string.\n        :return: a string with the decoded relation name.\n        "
         },
         {
@@ -104150,7 +104150,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\nwhile reading the .arff file.\n:param return_type: determines the data structure used to store the\ndataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n`arff.DENSE_GEN` or `arff.LOD_GEN`.\nConsult the sections on `working with sparse data`_ and `loading\nprogressively`_.",
+            "description": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_.",
             "docstring": "Returns the Python representation of a given ARFF file.\n\nWhen a file object is passed as an argument, this method reads lines\niteratively, avoiding to load unnecessary information to the memory.\n\n:param s: a string or file object with the ARFF file.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_."
         },
         {
@@ -104205,7 +104205,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n@attribute <attribute-name> <datatype>\n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n{<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration.",
+            "description": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n     @attribute <attribute-name> <datatype>\n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n    {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration.",
             "docstring": "(INTERNAL) Encodes an attribute line.\n\nThe attribute follow the template::\n\n     @attribute <attribute-name> <datatype>\n\nwhere ``attribute-name`` is a string, and ``datatype`` can be:\n\n- Numerical attributes as ``NUMERIC``, ``INTEGER`` or ``REAL``.\n- Strings as ``STRING``.\n- Dates (NOT IMPLEMENTED).\n- Nominal attributes with format:\n\n    {<nominal-name1>, <nominal-name2>, <nominal-name3>, ...}\n\nThis method must receive a the name of the attribute and its type, if\nthe attribute type is nominal, ``type`` must be a list of values.\n\n:param name: a string.\n:param type_: a string or a list of string.\n:return: a string with the encoded attribute declaration."
         },
         {
@@ -105624,7 +105624,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Load a file-like object containing the ARFF document and convert it into\na Python object.\n\n:param fp: a file-like object.\n:param encode_nominal: boolean, if True perform a label encoding\nwhile reading the .arff file.\n:param return_type: determines the data structure used to store the\ndataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n`arff.DENSE_GEN` or `arff.LOD_GEN`.\nConsult the sections on `working with sparse data`_ and `loading\nprogressively`_.\n:return: a dictionary.",
+            "description": "Load a file-like object containing the ARFF document and convert it into\na Python object.\n\n:param fp: a file-like object.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_.\n:return: a dictionary.",
             "docstring": "Load a file-like object containing the ARFF document and convert it into\na Python object.\n\n:param fp: a file-like object.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_.\n:return: a dictionary.\n "
         },
         {
@@ -105679,7 +105679,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Convert a string instance containing the ARFF document into a Python\nobject.\n\n:param s: a string object.\n:param encode_nominal: boolean, if True perform a label encoding\nwhile reading the .arff file.\n:param return_type: determines the data structure used to store the\ndataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n`arff.DENSE_GEN` or `arff.LOD_GEN`.\nConsult the sections on `working with sparse data`_ and `loading\nprogressively`_.\n:return: a dictionary.",
+            "description": "Convert a string instance containing the ARFF document into a Python\nobject.\n\n:param s: a string object.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_.\n:return: a dictionary.",
             "docstring": "Convert a string instance containing the ARFF document into a Python\nobject.\n\n:param s: a string object.\n:param encode_nominal: boolean, if True perform a label encoding\n    while reading the .arff file.\n:param return_type: determines the data structure used to store the\n    dataset. Can be one of `arff.DENSE`, `arff.COO`, `arff.LOD`,\n    `arff.DENSE_GEN` or `arff.LOD_GEN`.\n    Consult the sections on `working with sparse data`_ and `loading\n    progressively`_.\n:return: a dictionary."
         },
         {
@@ -108773,7 +108773,7 @@
                     "docstring": {
                         "type": "Mapping or iterable over Mappings",
                         "default_value": "",
-                        "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\nAccepts multiple string values for one categorical feature."
+                        "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\n   Accepts multiple string values for one categorical feature."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -108843,7 +108843,7 @@
                     "docstring": {
                         "type": "Mapping or iterable over Mappings",
                         "default_value": "",
-                        "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\nAccepts multiple string values for one categorical feature."
+                        "description": "Dict(s) or Mapping(s) from feature names (arbitrary Python\nobjects) to feature values (strings or convertible to dtype).\n\n.. versionchanged:: 0.24\n   Accepts multiple string values for one categorical feature."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -109257,7 +109257,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "When True, an alternating sign is added to the features as to\napproximately conserve the inner product in the hashed space even for\nsmall n_features. This approach is similar to sparse random projection.\n\n.. versionchanged:: 0.19\n``alternate_sign`` replaces the now deprecated ``non_negative``\nparameter."
+                        "description": "When True, an alternating sign is added to the features as to\napproximately conserve the inner product in the hashed space even for\nsmall n_features. This approach is similar to sparse random projection.\n\n.. versionchanged:: 0.19\n    ``alternate_sign`` replaces the now deprecated ``non_negative``\n    parameter."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -110617,11 +110617,11 @@
                     "docstring": {
                         "type": "{'filename', 'file', 'content'}",
                         "default_value": "'content'",
-                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\nexpected to be a list of filenames that need reading to fetch\nthe raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\nobject) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\ncan be of type string or byte."
+                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\n  expected to be a list of filenames that need reading to fetch\n  the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n  object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n  can be of type string or byte."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "file", "content"]
+                        "values": ["filename", "content", "file"]
                     }
                 },
                 {
@@ -110655,7 +110655,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["strict", "replace", "ignore"]
+                        "values": ["strict", "ignore", "replace"]
                     }
                 },
                 {
@@ -110803,7 +110803,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["char", "char_wb", "word"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -111548,11 +111548,11 @@
                     "docstring": {
                         "type": "{'filename', 'file', 'content'}",
                         "default_value": "'content'",
-                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\nexpected to be a list of filenames that need reading to fetch\nthe raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\nobject) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\ncan be of type string or byte."
+                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\n  expected to be a list of filenames that need reading to fetch\n  the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n  object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n  can be of type string or byte."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "file", "content"]
+                        "values": ["filename", "content", "file"]
                     }
                 },
                 {
@@ -111586,7 +111586,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["strict", "replace", "ignore"]
+                        "values": ["strict", "ignore", "replace"]
                     }
                 },
                 {
@@ -111727,14 +111727,14 @@
                     "docstring": {
                         "type": "{'word', 'char', 'char_wb'} or callable",
                         "default_value": "'word'",
-                        "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\nSince v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\nis first read from the file and then passed to the given callable\nanalyzer."
+                        "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n    Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n    is first read from the file and then passed to the given callable\n    analyzer."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["char", "char_wb", "word"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -111791,7 +111791,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1"]
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -111832,7 +111832,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\nstore a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\nconstructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\nis no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\nstring feature names) which can be a problem when trying to introspect\nwhich features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\nfeature index. However in practice this is rarely an issue if n_features\nis large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
+            "description": "Convert a collection of text documents to a matrix of token occurrences.\n\nIt turns a collection of text documents into a scipy.sparse matrix holding\ntoken occurrence counts (or binary occurrence information), possibly\nnormalized as token frequencies if norm='l1' or projected on the euclidean\nunit sphere if norm='l2'.\n\nThis text vectorizer implementation uses the hashing trick to find the\ntoken string name to feature integer index mapping.\n\nThis strategy has several advantages:\n\n- it is very low memory scalable to large datasets as there is no need to\n  store a vocabulary dictionary in memory.\n\n- it is fast to pickle and un-pickle as it holds no state besides the\n  constructor parameters.\n\n- it can be used in a streaming (partial fit) or parallel pipeline as there\n  is no state computed during fit.\n\nThere are also a couple of cons (vs using a CountVectorizer with an\nin-memory vocabulary):\n\n- there is no way to compute the inverse transform (from feature indices to\n  string feature names) which can be a problem when trying to introspect\n  which features are most important to a model.\n\n- there can be collisions: distinct tokens can be mapped to the same\n  feature index. However in practice this is rarely an issue if n_features\n  is large enough (e.g. 2 ** 18 for text classification problems).\n\n- no IDF weighting as this would render the transformer stateful.\n\nThe hash function employed is the signed 32-bit version of Murmurhash3.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.",
             "docstring": ""
         },
         {
@@ -112182,11 +112182,11 @@
                     "docstring": {
                         "type": "{'l1', 'l2'}",
                         "default_value": "'l2'",
-                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\nsimilarity between two vectors is their dot product when l2 norm has\nbeen applied.\n- 'l1': Sum of absolute values of vector elements is 1.\nSee :func:`preprocessing.normalize`."
+                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\n  similarity between two vectors is their dot product when l2 norm has\n  been applied.\n- 'l1': Sum of absolute values of vector elements is 1.\n  See :func:`preprocessing.normalize`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1"]
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -112503,11 +112503,11 @@
                     "docstring": {
                         "type": "{'filename', 'file', 'content'}",
                         "default_value": "'content'",
-                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\nexpected to be a list of filenames that need reading to fetch\nthe raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\nobject) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\ncan be of type string or byte."
+                        "description": "- If `'filename'`, the sequence passed as an argument to fit is\n  expected to be a list of filenames that need reading to fetch\n  the raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\n  object) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\n  can be of type string or byte."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["filename", "file", "content"]
+                        "values": ["filename", "content", "file"]
                     }
                 },
                 {
@@ -112541,7 +112541,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["strict", "replace", "ignore"]
+                        "values": ["strict", "ignore", "replace"]
                     }
                 },
                 {
@@ -112622,14 +112622,14 @@
                     "docstring": {
                         "type": "{'word', 'char', 'char_wb'} or callable",
                         "default_value": "'word'",
-                        "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\nSince v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\nis first read from the file and then passed to the given callable\nanalyzer."
+                        "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n    Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\n    is first read from the file and then passed to the given callable\n    analyzer."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["char", "char_wb", "word"]
+                                "values": ["word", "char_wb", "char"]
                             },
                             {
                                 "kind": "NamedType",
@@ -112853,11 +112853,11 @@
                     "docstring": {
                         "type": "{'l1', 'l2'}",
                         "default_value": "'l2'",
-                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\nsimilarity between two vectors is their dot product when l2 norm has\nbeen applied.\n- 'l1': Sum of absolute values of vector elements is 1.\nSee :func:`preprocessing.normalize`."
+                        "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\n  similarity between two vectors is their dot product when l2 norm has\n  been applied.\n- 'l1': Sum of absolute values of vector elements is 1.\n  See :func:`preprocessing.normalize`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1"]
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -114101,7 +114101,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -114488,7 +114488,7 @@
                     "docstring": {
                         "type": "int, callable",
                         "default_value": "None",
-                        "description": "The maximum number of features to select.\n\n- If an integer, then it specifies the maximum number of features to\nallow.\n- If a callable, then it specifies how to calculate the maximum number of\nfeatures allowed by using the output of `max_feaures(X)`.\n- If `None`, then all features are kept.\n\nTo only select based on ``max_features``, set ``threshold=-np.inf``.\n\n.. versionadded:: 0.20\n.. versionchanged:: 1.1\n`max_features` accepts a callable."
+                        "description": "The maximum number of features to select.\n\n- If an integer, then it specifies the maximum number of features to\n  allow.\n- If a callable, then it specifies how to calculate the maximum number of\n  features allowed by using the output of `max_feaures(X)`.\n- If `None`, then all features are kept.\n\nTo only select based on ``max_features``, set ``threshold=-np.inf``.\n\n.. versionadded:: 0.20\n.. versionchanged:: 1.1\n   `max_features` accepts a callable."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -115727,7 +115727,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "The number of features to select. If `None`, half of the features are\nselected. If integer, the parameter is the absolute number of features\nto select. If float between 0 and 1, it is the fraction of features to\nselect.\n\n.. versionchanged:: 0.24\nAdded float values for fractions."
+                        "description": "The number of features to select. If `None`, half of the features are\nselected. If integer, the parameter is the absolute number of features\nto select. If float between 0 and 1, it is the fraction of features to\nselect.\n\n.. versionchanged:: 0.24\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -116486,7 +116486,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If the\nestimator is a classifier or if ``y`` is neither binary nor multiclass,\n:class:`~sklearn.model_selection.KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value of None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If the\nestimator is a classifier or if ``y`` is neither binary nor multiclass,\n:class:`~sklearn.model_selection.KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value of None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -116923,7 +116923,7 @@
                     "docstring": {
                         "type": "\"auto\", int or float",
                         "default_value": "'warn'",
-                        "description": "If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n- if `tol` is not `None`, then features are selected until the score\nimprovement does not exceed `tol`.\n- otherwise, half of the features are selected.\n\nIf integer, the parameter is the absolute number of features to select.\nIf float between 0 and 1, it is the fraction of features to select.\n\n.. versionadded:: 1.1\nThe option `\"auto\"` was added in version 1.1.\n\n.. deprecated:: 1.1\nThe default changed from `None` to `\"warn\"` in 1.1 and will become\n`\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\nTo keep the same behaviour as `None`, set\n`n_features_to_select=\"auto\" and `tol=None`."
+                        "description": "If `\"auto\"`, the behaviour depends on the `tol` parameter:\n\n- if `tol` is not `None`, then features are selected until the score\n  improvement does not exceed `tol`.\n- otherwise, half of the features are selected.\n\nIf integer, the parameter is the absolute number of features to select.\nIf float between 0 and 1, it is the fraction of features to select.\n\n.. versionadded:: 1.1\n   The option `\"auto\"` was added in version 1.1.\n\n.. deprecated:: 1.1\n   The default changed from `None` to `\"warn\"` in 1.1 and will become\n   `\"auto\"` in 1.3. `None` and `'warn'` will be removed in 1.3.\n   To keep the same behaviour as `None`, set\n   `n_features_to_select=\"auto\" and `tol=None`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -116974,7 +116974,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["forward", "backward"]
+                        "values": ["backward", "forward"]
                     }
                 },
                 {
@@ -117314,7 +117314,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["fpr", "k_best", "fwe", "percentile", "fdr"]
+                        "values": ["fdr", "fwe", "percentile", "k_best", "fpr"]
                     }
                 },
                 {
@@ -118518,7 +118518,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Whether or not to force the F-statistics and associated p-values to\nbe finite. There are two cases where the F-statistic is expected to not\nbe finite:\n\n- when the target `y` or some features in `X` are constant. In this\ncase, the Pearson's R correlation is not defined leading to obtain\n`np.nan` values in the F-statistic and p-value. When\n`force_finite=True`, the F-statistic is set to `0.0` and the\nassociated p-value is set to `1.0`.\n- when the a feature in `X` is perfectly correlated (or\nanti-correlated) with the target `y`. In this case, the F-statistic\nis expected to be `np.inf`. When `force_finite=True`, the F-statistic\nis set to `np.finfo(dtype).max` and the associated p-value is set to\n`0.0`.\n\n.. versionadded:: 1.1"
+                        "description": "Whether or not to force the F-statistics and associated p-values to\nbe finite. There are two cases where the F-statistic is expected to not\nbe finite:\n\n- when the target `y` or some features in `X` are constant. In this\n  case, the Pearson's R correlation is not defined leading to obtain\n  `np.nan` values in the F-statistic and p-value. When\n  `force_finite=True`, the F-statistic is set to `0.0` and the\n  associated p-value is set to `1.0`.\n- when the a feature in `X` is perfectly correlated (or\n  anti-correlated) with the target `y`. In this case, the F-statistic\n  is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n  is set to `np.finfo(dtype).max` and the associated p-value is set to\n  `0.0`.\n\n.. versionadded:: 1.1"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -118529,7 +118529,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
-            "description": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\nusing :func:`r_regression` as::\n\nE[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
+            "description": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\n   using :func:`r_regression` as::\n\n       E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.",
             "docstring": "Univariate linear regression tests returning F-statistic and p-values.\n\nQuick linear model for testing the effect of a single regressor,\nsequentially for many regressors.\n\nThis is done in 2 steps:\n\n1. The cross correlation between each regressor and the target is computed\n   using :func:`r_regression` as::\n\n       E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\n2. It is converted to an F score and then to a p-value.\n\n:func:`f_regression` is derived from :func:`r_regression` and will rank\nfeatures in the same order if all the features are positively correlated\nwith the target.\n\nNote however that contrary to :func:`f_regression`, :func:`r_regression`\nvalues lie in [-1, 1] and can thus be negative. :func:`f_regression` is\ntherefore recommended as a feature selection criterion to identify\npotentially predictive feature for a downstream classifier, irrespective of\nthe sign of the association with the target variable.\n\nFurthermore :func:`f_regression` returns p-values while\n:func:`r_regression` does not.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix.\n\ny : array-like of shape (n_samples,)\n    The target vector.\n\ncenter : bool, default=True\n    Whether or not to center the data matrix `X` and the target vector `y`.\n    By default, `X` and `y` will be centered.\n\nforce_finite : bool, default=True\n    Whether or not to force the F-statistics and associated p-values to\n    be finite. There are two cases where the F-statistic is expected to not\n    be finite:\n\n    - when the target `y` or some features in `X` are constant. In this\n      case, the Pearson's R correlation is not defined leading to obtain\n      `np.nan` values in the F-statistic and p-value. When\n      `force_finite=True`, the F-statistic is set to `0.0` and the\n      associated p-value is set to `1.0`.\n    - when the a feature in `X` is perfectly correlated (or\n      anti-correlated) with the target `y`. In this case, the F-statistic\n      is expected to be `np.inf`. When `force_finite=True`, the F-statistic\n      is set to `np.finfo(dtype).max` and the associated p-value is set to\n      `0.0`.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\nf_statistic : ndarray of shape (n_features,)\n    F-statistic for each feature.\n\np_values : ndarray of shape (n_features,)\n    P-values associated with the F-statistic.\n\nSee Also\n--------\nr_regression: Pearson's R between label/feature for regression tasks.\nf_classif: ANOVA F-value between label/feature for classification tasks.\nchi2: Chi-squared stats of non-negative features for classification tasks.\nSelectKBest: Select features based on the k highest scores.\nSelectFpr: Select features based on a false positive rate test.\nSelectFdr: Select features based on an estimated false discovery rate.\nSelectFwe: Select features based on family-wise error rate.\nSelectPercentile: Select features based on percentile of the highest\n    scores."
         },
         {
@@ -118619,7 +118619,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.feature_selection"],
-            "description": "Compute Pearson's r for each features and the target.\n\nPearson's r is also known as the Pearson correlation coefficient.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThe cross correlation between each regressor and the target is computed\nas::\n\nE[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\nFor more on usage see the :ref:`User Guide <univariate_feature_selection>`.\n\n.. versionadded:: 1.0",
+            "description": "Compute Pearson's r for each features and the target.\n\nPearson's r is also known as the Pearson correlation coefficient.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThe cross correlation between each regressor and the target is computed\nas::\n\n    E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\nFor more on usage see the :ref:`User Guide <univariate_feature_selection>`.\n\n.. versionadded:: 1.0",
             "docstring": "Compute Pearson's r for each features and the target.\n\nPearson's r is also known as the Pearson correlation coefficient.\n\nLinear model for testing the individual effect of each of many regressors.\nThis is a scoring function to be used in a feature selection procedure, not\na free standing feature selection procedure.\n\nThe cross correlation between each regressor and the target is computed\nas::\n\n    E[(X[:, i] - mean(X[:, i])) * (y - mean(y))] / (std(X[:, i]) * std(y))\n\nFor more on usage see the :ref:`User Guide <univariate_feature_selection>`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    The data matrix.\n\ny : array-like of shape (n_samples,)\n    The target vector.\n\ncenter : bool, default=True\n    Whether or not to center the data matrix `X` and the target vector `y`.\n    By default, `X` and `y` will be centered.\n\nforce_finite : bool, default=True\n    Whether or not to force the Pearson's R correlation to be finite.\n    In the particular case where some features in `X` or the target `y`\n    are constant, the Pearson's R correlation is not defined. When\n    `force_finite=False`, a correlation of `np.nan` is returned to\n    acknowledge this case. When `force_finite=True`, this value will be\n    forced to a minimal correlation of `0.0`.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ncorrelation_coefficient : ndarray of shape (n_features,)\n    Pearson's R correlation coefficients of features.\n\nSee Also\n--------\nf_regression: Univariate linear regression tests returning f-statistic\n    and p-values.\nmutual_info_regression: Mutual information for a continuous target.\nf_classif: ANOVA F-value between label/feature for classification tasks.\nchi2: Chi-squared stats of non-negative features for classification tasks."
         },
         {
@@ -118837,7 +118837,7 @@
                     "docstring": {
                         "type": "'fmin_l_bfgs_b' or callable",
                         "default_value": "'fmin_l_bfgs_b'",
-                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the  signature::\n\ndef optimizer(obj_func, initial_theta, bounds):\n# * 'obj_func' is the objective function to be maximized, which\n#   takes the hyperparameters theta as parameter and an\n#   optional flag eval_gradient, which determines if the\n#   gradient is returned additionally to the function value\n# * 'initial_theta': the initial value for theta, which can be\n#   used by local optimizers\n# * 'bounds': the bounds on the values of theta\n....\n# Returned are the best found hyperparameters theta and\n# the corresponding value of the target function.\nreturn theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n'fmin_l_bfgs_b'"
+                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the  signature::\n\n    def optimizer(obj_func, initial_theta, bounds):\n        # * 'obj_func' is the objective function to be maximized, which\n        #   takes the hyperparameters theta as parameter and an\n        #   optional flag eval_gradient, which determines if the\n        #   gradient is returned additionally to the function value\n        # * 'initial_theta': the initial value for theta, which can be\n        #   used by local optimizers\n        # * 'bounds': the bounds on the values of theta\n        ....\n        # Returned are the best found hyperparameters theta and\n        # the corresponding value of the target function.\n        return theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n    'fmin_l_bfgs_b'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -118965,7 +118965,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["one_vs_rest", "one_vs_one"]
+                        "values": ["one_vs_one", "one_vs_rest"]
                     }
                 },
                 {
@@ -119320,7 +119320,7 @@
                     "docstring": {
                         "type": "'fmin_l_bfgs_b' or callable",
                         "default_value": "'fmin_l_bfgs_b'",
-                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the  signature::\n\ndef optimizer(obj_func, initial_theta, bounds):\n# * 'obj_func' is the objective function to be maximized, which\n#   takes the hyperparameters theta as parameter and an\n#   optional flag eval_gradient, which determines if the\n#   gradient is returned additionally to the function value\n# * 'initial_theta': the initial value for theta, which can be\n#   used by local optimizers\n# * 'bounds': the bounds on the values of theta\n....\n# Returned are the best found hyperparameters theta and\n# the corresponding value of the target function.\nreturn theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n'fmin_l_bfgs_b'"
+                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the  signature::\n\n    def optimizer(obj_func, initial_theta, bounds):\n        # * 'obj_func' is the objective function to be maximized, which\n        #   takes the hyperparameters theta as parameter and an\n        #   optional flag eval_gradient, which determines if the\n        #   gradient is returned additionally to the function value\n        # * 'initial_theta': the initial value for theta, which can be\n        #   used by local optimizers\n        # * 'bounds': the bounds on the values of theta\n        ....\n        # Returned are the best found hyperparameters theta and\n        # the corresponding value of the target function.\n        return theta_opt, func_min\n\nPer default, the 'L-BFGS-B' algorithm from scipy.optimize.minimize\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are::\n\n    'fmin_l_bfgs_b'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -119892,7 +119892,7 @@
                     "docstring": {
                         "type": "\"fmin_l_bfgs_b\" or callable",
                         "default_value": "\"fmin_l_bfgs_b\"",
-                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\ndef optimizer(obj_func, initial_theta, bounds):\n# * 'obj_func': the objective function to be minimized, which\n#   takes the hyperparameters theta as a parameter and an\n#   optional flag eval_gradient, which determines if the\n#   gradient is returned additionally to the function value\n# * 'initial_theta': the initial value for theta, which can be\n#   used by local optimizers\n# * 'bounds': the bounds on the values of theta\n....\n# Returned are the best found hyperparameters theta and\n# the corresponding value of the target function.\nreturn theta_opt, func_min\n\nPer default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are: `{'fmin_l_bfgs_b'}`."
+                        "description": "Can either be one of the internally supported optimizers for optimizing\nthe kernel's parameters, specified by a string, or an externally\ndefined optimizer passed as a callable. If a callable is passed, it\nmust have the signature::\n\n    def optimizer(obj_func, initial_theta, bounds):\n        # * 'obj_func': the objective function to be minimized, which\n        #   takes the hyperparameters theta as a parameter and an\n        #   optional flag eval_gradient, which determines if the\n        #   gradient is returned additionally to the function value\n        # * 'initial_theta': the initial value for theta, which can be\n        #   used by local optimizers\n        # * 'bounds': the bounds on the values of theta\n        ....\n        # Returned are the best found hyperparameters theta and\n        # the corresponding value of the target function.\n        return theta_opt, func_min\n\nPer default, the L-BFGS-B algorithm from `scipy.optimize.minimize`\nis used. If None is passed, the kernel's parameters are kept fixed.\nAvailable internal optimizers are: `{'fmin_l_bfgs_b'}`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -119993,7 +119993,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n* allows prediction without prior fitting (based on the GP prior)\n* provides an additional method `sample_y(X)`, which evaluates samples\ndrawn from the GPR (prior or posterior) at given inputs\n* exposes a method `log_marginal_likelihood(theta)`, which can be used\nexternally for other ways of selecting hyperparameters, e.g., via\nMarkov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
+            "description": "Gaussian process regression (GPR).\n\nThe implementation is based on Algorithm 2.1 of [1]_.\n\nIn addition to standard scikit-learn estimator API,\n:class:`GaussianProcessRegressor`:\n\n   * allows prediction without prior fitting (based on the GP prior)\n   * provides an additional method `sample_y(X)`, which evaluates samples\n     drawn from the GPR (prior or posterior) at given inputs\n   * exposes a method `log_marginal_likelihood(theta)`, which can be used\n     externally for other ways of selecting hyperparameters, e.g., via\n     Markov chain Monte Carlo.\n\nRead more in the :ref:`User Guide <gaussian_process>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -121020,7 +121020,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\nk(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\nkernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\nkernel = RBF() + 2\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Constant kernel.\n\nCan be used as part of a product-kernel where it scales the magnitude of\nthe other factor (kernel) or as part of a sum-kernel, where it modifies\nthe mean of the Gaussian process.\n\n.. math::\n    k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2\n\nAdding a constant kernel is equivalent to adding a constant::\n\n        kernel = RBF() + ConstantKernel(constant_value=2)\n\nis the same as::\n\n        kernel = RBF() + 2\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -121275,7 +121275,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\nk(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Dot-Product kernel.\n\nThe DotProduct kernel is non-stationary and can be obtained from linear\nregression by putting :math:`N(0, 1)` priors on the coefficients\nof :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \\sigma_0^2)`\non the bias. The DotProduct kernel is invariant to a rotation of\nthe coordinates about the origin, but not translations.\nIt is parameterized by a parameter sigma_0 :math:`\\sigma`\nwhich controls the inhomogenity of the kernel. For :math:`\\sigma_0^2 =0`,\nthe kernel is called the homogeneous linear kernel, otherwise\nit is inhomogeneous. The kernel is given by\n\n.. math::\n    k(x_i, x_j) = \\sigma_0 ^ 2 + x_i \\cdot x_j\n\nThe DotProduct kernel is commonly combined with exponentiation.\n\nSee [1]_, Chapter 4, Section 4.2, for further details regarding the\nDotProduct kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -121591,7 +121591,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\text{exp}\\left(-\n\\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Exp-Sine-Squared kernel (aka periodic kernel).\n\nThe ExpSineSquared kernel allows one to model functions which repeat\nthemselves exactly. It is parameterized by a length scale\nparameter :math:`l>0` and a periodicity parameter :math:`p>0`.\nOnly the isotropic variant where :math:`l` is a scalar is\nsupported at the moment. The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\text{exp}\\left(-\n    \\frac{ 2\\sin^2(\\pi d(x_i, x_j)/p) }{ l^ 2} \\right)\n\nwhere :math:`l` is the length scale of the kernel, :math:`p` the\nperiodicity of the kernel and :math:`d(\\\\cdot,\\\\cdot)` is the\nEuclidean distance.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -121870,7 +121870,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\nk_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "The Exponentiation kernel takes one base kernel and a scalar parameter\n:math:`p` and combines them via\n\n.. math::\n    k_{exp}(X, Y) = k(X, Y) ^p\n\nNote that the `__pow__` magic method is overridden, so\n`Exponentiation(RBF(), 2)` is equivalent to using the ** operator\nwith `RBF() ** 2`.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -123579,7 +123579,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\nk(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n\\Bigg)^\\nu K_\\nu\\Bigg(\n\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Matern kernel.\n\nThe class of Matern kernels is a generalization of the :class:`RBF`.\nIt has an additional parameter :math:`\\nu` which controls the\nsmoothness of the resulting function. The smaller :math:`\\nu`,\nthe less smooth the approximated function is.\nAs :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to\nthe :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Mat\u00e9rn kernel\nbecomes identical to the absolute exponential kernel.\nImportant intermediate values are\n:math:`\\nu=1.5` (once differentiable functions)\nand :math:`\\nu=2.5` (twice differentiable functions).\n\nThe kernel is given by:\n\n.. math::\n     k(x_i, x_j) =  \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\n     \\Bigg)^\\nu K_\\nu\\Bigg(\n     \\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg)\n\nwhere :math:`d(\\cdot,\\cdot)` is the Euclidean distance,\n:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and\n:math:`\\Gamma(\\cdot)` is the gamma function.\nSee [1]_, Chapter 4, Section 4.2, for details regarding the different\nvariants of the Matern kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -123812,15 +123812,15 @@
                             {
                                 "kind": "EnumType",
                                 "values": [
+                                    "chi2",
                                     "laplacian",
-                                    "cosine",
-                                    "poly",
                                     "linear",
                                     "sigmoid",
-                                    "chi2",
-                                    "additive_chi2",
                                     "polynomial",
-                                    "rbf"
+                                    "poly",
+                                    "rbf",
+                                    "additive_chi2",
+                                    "cosine"
                                 ]
                             },
                             {
@@ -123851,7 +123851,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\nkernels support only isotropic distances. The parameter gamma is\nconsidered to be a hyperparameter and may be optimized. The other\nkernel parameters are set directly at initialization and are kept\nfixed.\n\n.. versionadded:: 0.18",
+            "description": "Wrapper for kernels in sklearn.metrics.pairwise.\n\nA thin wrapper around the functionality of the kernels in\nsklearn.metrics.pairwise.\n\nNote: Evaluation of eval_gradient is not analytic but numeric and all\n      kernels support only isotropic distances. The parameter gamma is\n      considered to be a hyperparameter and may be optimized. The other\n      kernel parameters are set directly at initialization and are kept\n      fixed.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -124309,7 +124309,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Radial-basis function kernel (aka squared-exponential kernel).\n\nThe RBF kernel is a stationary kernel. It is also known as the\n\"squared exponential\" kernel. It is parameterized by a length scale\nparameter :math:`l>0`, which can either be a scalar (isotropic variant\nof the kernel) or a vector with the same number of dimensions as the inputs\nX (anisotropic variant of the kernel). The kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right)\n\nwhere :math:`l` is the length scale of the kernel and\n:math:`d(\\cdot,\\cdot)` is the Euclidean distance.\nFor advice on how to set the length scale parameter, see e.g. [1]_.\n\nThis kernel is infinitely differentiable, which implies that GPs with this\nkernel as covariance function have mean square derivatives of all orders,\nand are thus very smooth.\nSee [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -124581,7 +124581,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\nk(x_i, x_j) = \\left(\n1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha  l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "Rational Quadratic kernel.\n\nThe RationalQuadratic kernel can be seen as a scale mixture (an infinite\nsum) of RBF kernels with different characteristic length scales. It is\nparameterized by a length scale parameter :math:`l>0` and a scale\nmixture parameter :math:`\\alpha>0`. Only the isotropic variant\nwhere length_scale :math:`l` is a scalar is supported at the moment.\nThe kernel is given by:\n\n.. math::\n    k(x_i, x_j) = \\left(\n    1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha  l^2}\\right)^{-\\alpha}\n\nwhere :math:`\\alpha` is the scale mixture parameter, :math:`l` is\nthe length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the\nEuclidean distance.\nFor advice on how to set the parameters, see e.g. [1]_.\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -125031,7 +125031,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\nk(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
+            "description": "White kernel.\n\nThe main use-case of this kernel is as part of a sum-kernel where it\nexplains the noise of the signal as independently and identically\nnormally-distributed. The parameter noise_level equals the variance of this\nnoise.\n\n.. math::\n    k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0\n\nRead more in the :ref:`User Guide <gp_kernels>`.\n\n.. versionadded:: 0.18",
             "docstring": ""
         },
         {
@@ -125319,11 +125319,11 @@
                     "docstring": {
                         "type": "{'missing-only', 'all'}",
                         "default_value": "'missing-only'",
-                        "description": "Whether the imputer mask should represent all or a subset of\nfeatures.\n\n- If `'missing-only'` (default), the imputer mask will only represent\nfeatures containing missing values during fit time.\n- If `'all'`, the imputer mask will represent all features."
+                        "description": "Whether the imputer mask should represent all or a subset of\nfeatures.\n\n- If `'missing-only'` (default), the imputer mask will only represent\n  features containing missing values during fit time.\n- If `'all'`, the imputer mask will represent all features."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["missing-only", "all"]
+                        "values": ["all", "missing-only"]
                     }
                 },
                 {
@@ -125336,7 +125336,7 @@
                     "docstring": {
                         "type": "bool or 'auto'",
                         "default_value": "'auto'",
-                        "description": "Whether the imputer mask format should be sparse or dense.\n\n- If `'auto'` (default), the imputer mask will be of same type as\ninput.\n- If `True`, the imputer mask will be a sparse matrix.\n- If `False`, the imputer mask will be a numpy array."
+                        "description": "Whether the imputer mask format should be sparse or dense.\n\n- If `'auto'` (default), the imputer mask will be of same type as\n  input.\n- If `True`, the imputer mask will be a sparse matrix.\n- If `False`, the imputer mask will be a numpy array."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -125765,7 +125765,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -125913,7 +125913,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'mean'",
-                        "description": "The imputation strategy.\n\n- If \"mean\", then replace missing values using the mean along\neach column. Can only be used with numeric data.\n- If \"median\", then replace missing values using the median along\neach column. Can only be used with numeric data.\n- If \"most_frequent\", then replace missing using the most frequent\nvalue along each column. Can be used with strings or numeric data.\nIf there is more than one such value, only the smallest is returned.\n- If \"constant\", then replace missing values with fill_value. Can be\nused with strings or numeric data.\n\n.. versionadded:: 0.20\nstrategy=\"constant\" for fixed value imputation."
+                        "description": "The imputation strategy.\n\n- If \"mean\", then replace missing values using the mean along\n  each column. Can only be used with numeric data.\n- If \"median\", then replace missing values using the median along\n  each column. Can only be used with numeric data.\n- If \"most_frequent\", then replace missing using the most frequent\n  value along each column. Can be used with strings or numeric data.\n  If there is more than one such value, only the smallest is returned.\n- If \"constant\", then replace missing values with fill_value. Can be\n  used with strings or numeric data.\n\n.. versionadded:: 0.20\n   strategy=\"constant\" for fixed value imputation."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -125956,7 +125956,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "0",
-                        "description": "Controls the verbosity of the imputer.\n\n.. deprecated:: 1.1\nThe 'verbose' parameter was deprecated in version 1.1 and will be\nremoved in 1.3. A warning will always be raised upon the removal of\nempty columns in the future version."
+                        "description": "Controls the verbosity of the imputer.\n\n.. deprecated:: 1.1\n   The 'verbose' parameter was deprecated in version 1.1 and will be\n   removed in 1.3. A warning will always be raised upon the removal of\n   empty columns in the future version."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -126001,7 +126001,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n`SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\nestimator which is now removed.",
+            "description": "Imputation transformer for completing missing values.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n   `SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\n   estimator which is now removed.",
             "docstring": ""
         },
         {
@@ -126352,7 +126352,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -126987,7 +126987,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["median", "constant", "mean", "most_frequent"]
+                        "values": ["constant", "median", "mean", "most_frequent"]
                     }
                 },
                 {
@@ -127004,7 +127004,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["roman", "descending", "ascending", "random", "arabic"]
+                        "values": ["ascending", "arabic", "random", "descending", "roman"]
                     }
                 },
                 {
@@ -127034,7 +127034,7 @@
                     "docstring": {
                         "type": "float or array-like of shape (n_features,)",
                         "default_value": "-np.inf",
-                        "description": "Minimum possible imputed value. Broadcast to shape `(n_features,)` if\nscalar. If array-like, expects shape `(n_features,)`, one min value for\neach feature. The default is `-np.inf`.\n\n.. versionchanged:: 0.23\nAdded support for array-like."
+                        "description": "Minimum possible imputed value. Broadcast to shape `(n_features,)` if\nscalar. If array-like, expects shape `(n_features,)`, one min value for\neach feature. The default is `-np.inf`.\n\n.. versionchanged:: 0.23\n   Added support for array-like."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -127060,7 +127060,7 @@
                     "docstring": {
                         "type": "float or array-like of shape (n_features,)",
                         "default_value": "np.inf",
-                        "description": "Maximum possible imputed value. Broadcast to shape `(n_features,)` if\nscalar. If array-like, expects shape `(n_features,)`, one max value for\neach feature. The default is `np.inf`.\n\n.. versionchanged:: 0.23\nAdded support for array-like."
+                        "description": "Maximum possible imputed value. Broadcast to shape `(n_features,)` if\nscalar. If array-like, expects shape `(n_features,)`, one max value for\neach feature. The default is `np.inf`.\n\n.. versionchanged:: 0.23\n   Added support for array-like."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -127144,7 +127144,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import `enable_iterative_imputer`::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_iterative_imputer  # noqa\n>>> # now you can import normally from sklearn.impute\n>>> from sklearn.impute import IterativeImputer",
+            "description": "Multivariate imputer that estimates each feature from all the others.\n\nA strategy for imputing missing values by modeling each feature with\nmissing values as a function of other features in a round-robin fashion.\n\nRead more in the :ref:`User Guide <iterative_imputer>`.\n\n.. versionadded:: 0.21\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import `enable_iterative_imputer`::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_iterative_imputer  # noqa\n    >>> # now you can import normally from sklearn.impute\n    >>> from sklearn.impute import IterativeImputer",
             "docstring": ""
         },
         {
@@ -127781,7 +127781,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -127933,14 +127933,14 @@
                     "docstring": {
                         "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
-                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood are\nweighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- callable : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights."
+                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood are\n  weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- callable : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["distance", "uniform"]
+                                "values": ["uniform", "distance"]
                             },
                             {
                                 "kind": "NamedType",
@@ -127959,7 +127959,7 @@
                     "docstring": {
                         "type": "{'nan_euclidean'} or callable",
                         "default_value": "'nan_euclidean'",
-                        "description": "Distance metric for searching neighbors. Possible values:\n\n- 'nan_euclidean'\n- callable : a user-defined function which conforms to the definition\nof ``_pairwise_callable(X, Y, metric, **kwds)``. The function\naccepts two arrays, X and Y, and a `missing_values` keyword in\n`kwds` and returns a scalar distance value."
+                        "description": "Distance metric for searching neighbors. Possible values:\n\n- 'nan_euclidean'\n- callable : a user-defined function which conforms to the definition\n  of ``_pairwise_callable(X, Y, metric, **kwds)``. The function\n  accepts two arrays, X and Y, and a `missing_values` keyword in\n  `kwds` and returns a scalar distance value."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -128202,7 +128202,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -128596,7 +128596,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -128643,11 +128643,11 @@
                     "docstring": {
                         "type": "{'auto', 'recursion', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n(namely\n:class:`~sklearn.ensemble.GradientBoostingClassifier`,\n:class:`~sklearn.ensemble.GradientBoostingRegressor`,\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n:class:`~sklearn.tree.DecisionTreeRegressor`,\n:class:`~sklearn.ensemble.RandomForestRegressor`,\n) when `kind='average'`.\nThis is more efficient in terms of speed.\nWith this method, the target response of a\nclassifier is always the decision function, not the predicted\nprobabilities. Since the `'recursion'` method implicitly computes\nthe average of the Individual Conditional Expectation (ICE) by\ndesign, it is not compatible with ICE and thus `kind` must be\n`'average'`.\n\n- `'brute'` is supported for any estimator, but is more\ncomputationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\nand `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
+                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n  (namely\n  :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n  :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n  :class:`~sklearn.tree.DecisionTreeRegressor`,\n  :class:`~sklearn.ensemble.RandomForestRegressor`,\n  ) when `kind='average'`.\n  This is more efficient in terms of speed.\n  With this method, the target response of a\n  classifier is always the decision function, not the predicted\n  probabilities. Since the `'recursion'` method implicitly computes\n  the average of the Individual Conditional Expectation (ICE) by\n  design, it is not compatible with ICE and thus `kind` must be\n  `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n  computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n  and `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["brute", "auto", "recursion"]
+                        "values": ["brute", "recursion", "auto"]
                     }
                 },
                 {
@@ -128664,14 +128664,14 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["both", "average", "individual"]
+                        "values": ["average", "individual", "both"]
                     }
                 }
             ],
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
-            "description": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\nFor :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n`'recursion'` method (used by default) will not account for the `init`\npredictor of the boosting process. In practice, this will produce\nthe same values as `'brute'` up to a constant offset in the target\nresponse, provided that `init` is a constant estimator (which is the\ndefault). However, if `init` is not a constant estimator, the\npartial dependence values are incorrect for `'recursion'` because the\noffset will be sample-dependent. It is preferable to use the `'brute'`\nmethod. Note that this only applies to\n:class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.",
+            "description": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.",
             "docstring": "Partial dependence of ``features``.\n\nPartial dependence of a feature (or a set of features) corresponds to\nthe average response of an estimator for each possible value of the\nfeature.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like or dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is 'brute'.\n\nfeatures : array-like of {int, str}\n    The feature (e.g. `[0]`) or pair of interacting features\n    (e.g. `[(0, 1)]`) for which the partial dependency should be computed.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is 'recursion', the response is always the output of\n    :term:`decision_function`.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the grid. Must be in [0, 1].\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the grid, for each target\n    feature.\n\nmethod : {'auto', 'recursion', 'brute'}, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`,\n      ) when `kind='average'`.\n      This is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the Individual Conditional Expectation (ICE) by\n      design, it is not compatible with ICE and thus `kind` must be\n      `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to return the partial dependence averaged across all the\n    samples in the dataset or one line per sample or both.\n    See Returns below.\n\n    Note that the fast `method='recursion'` option is only available for\n    `kind='average'`. Plotting individual dependencies requires using the\n    slower `method='brute'` option.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\npredictions : :class:`~sklearn.utils.Bunch`\n    Dictionary-like object, with the following attributes.\n\n    individual : ndarray of shape (n_outputs, n_instances,                 len(values[0]), len(values[1]), ...)\n        The predictions for all the points in the grid for all\n        samples in X. This is also known as Individual\n        Conditional Expectation (ICE)\n\n    average : ndarray of shape (n_outputs, len(values[0]),                 len(values[1]), ...)\n        The predictions for all the points in the grid, averaged\n        over all samples in X (or over the training data if\n        ``method`` is 'recursion').\n        Only available when ``kind='both'``.\n\n    values : seq of 1d ndarrays\n        The values with which the grid has been created. The generated\n        grid is a cartesian product of the arrays in ``values``.\n        ``len(values) == len(features)``. The size of each array\n        ``values[j]`` is either ``grid_resolution``, or the number of\n        unique values in ``X[:, j]``, whichever is smaller.\n\n    ``n_outputs`` corresponds to the number of classes in a multi-class\n    setting, or to the number of tasks for multi-output regression.\n    For classical regression and binary classification ``n_outputs==1``.\n    ``n_values_feature_j`` corresponds to the size ``values[j]``.\n\nSee Also\n--------\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\nPartialDependenceDisplay : Partial Dependence visualization.\n\nExamples\n--------\n>>> X = [[0, 0, 2], [1, 0, 0]]\n>>> y = [0, 1]\n>>> from sklearn.ensemble import GradientBoostingClassifier\n>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)\n>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),\n...                    grid_resolution=2) # doctest: +SKIP\n(array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])"
         },
         {
@@ -129036,7 +129036,7 @@
                     "docstring": {
                         "type": "str, callable, list, tuple, or dict",
                         "default_value": "None",
-                        "description": "Scorer to use.\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nPassing multiple scores to `scoring` is more efficient than calling\n`permutation_importance` for each of the scores as it reuses\npredictions to avoid redundant computation.\n\nIf None, the estimator's default scorer is used."
+                        "description": "Scorer to use.\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n  names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nPassing multiple scores to `scoring` is more efficient than calling\n`permutation_importance` for each of the scores as it reuses\npredictions to avoid redundant computation.\n\nIf None, the estimator's default scorer is used."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -129160,7 +129160,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1.0",
-                        "description": "The number of samples to draw from X to compute feature importance\nin each repeat (without replacement).\n\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples.\n- If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples\nwill be used.\n\nWhile using this option may provide less accurate importance estimates,\nit keeps the method tractable when evaluating feature importance on\nlarge datasets. In combination with `n_repeats`, this allows to control\nthe computational speed vs statistical accuracy trade-off of this method.\n\n.. versionadded:: 1.0"
+                        "description": "The number of samples to draw from X to compute feature importance\nin each repeat (without replacement).\n\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples.\n- If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples\n  will be used.\n\nWhile using this option may provide less accurate importance estimates,\nit keeps the method tractable when evaluating feature importance on\nlarge datasets. In combination with `n_repeats`, this allows to control\nthe computational speed vs statistical accuracy trade-off of this method.\n\n.. versionadded:: 1.0"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -129406,7 +129406,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["contourf", "pcolormesh", "contour"]
+                        "values": ["pcolormesh", "contour", "contourf"]
                     }
                 },
                 {
@@ -129423,7 +129423,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict", "auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -129535,7 +129535,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["contourf", "pcolormesh", "contour"]
+                        "values": ["pcolormesh", "contour", "contourf"]
                     }
                 },
                 {
@@ -129650,7 +129650,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict", "auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict", "predict_proba", "auto"]
                     }
                 }
             ],
@@ -129750,7 +129750,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "- In a multiclass setting, specifies the class for which the PDPs\nshould be computed. Note that for binary classification, the\npositive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\nshould be computed.\n\nIgnored in binary classification or classical regression settings."
+                        "description": "- In a multiclass setting, specifies the class for which the PDPs\n  should be computed. Note that for binary classification, the\n  positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n  should be computed.\n\nIgnored in binary classification or classical regression settings."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -129784,7 +129784,7 @@
                     "docstring": {
                         "type": "dict or None",
                         "default_value": "",
-                        "description": "Global min and max average predictions, such that all plots will have\nthe same scale and y limits. `pdp_lim[1]` is the global min and max for\nsingle partial dependence curves. `pdp_lim[2]` is the global min and\nmax for two-way partial dependence curves. If `None`, the limit will be\ninferred from the global minimum and maximum of all predictions.\n\n.. deprecated:: 1.1\nPass the parameter `pdp_lim` to\n:meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\nIt will be removed in 1.3."
+                        "description": "Global min and max average predictions, such that all plots will have\nthe same scale and y limits. `pdp_lim[1]` is the global min and max for\nsingle partial dependence curves. `pdp_lim[2]` is the global min and\nmax for two-way partial dependence curves. If `None`, the limit will be\ninferred from the global minimum and maximum of all predictions.\n\n.. deprecated:: 1.1\n   Pass the parameter `pdp_lim` to\n   :meth:`~sklearn.inspection.PartialDependenceDisplay.plot` instead.\n   It will be removed in 1.3."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -129810,14 +129810,14 @@
                     "docstring": {
                         "type": "{'average', 'individual', 'both'} or list of such str",
                         "default_value": "'average'",
-                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot;\n- ``kind='both'`` results in plotting both the ICE and PD on the same\nplot.\n\nA list of such strings can be provided to specify `kind` on a per-plot\nbasis. The length of the list should be the same as the number of\ninteraction requested in `features`.\n\n.. note::\nICE ('individual' or 'both') is not a valid option for 2-ways\ninteractions plot. As a result, an error will be raised.\n2-ways interaction plots should always be configured to\nuse the 'average' kind instead.\n\n.. note::\nThe fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using\nthe slower ``method='brute'`` option.\n\n.. versionadded:: 0.24\nAdd `kind` parameter with `'average'`, `'individual'`, and `'both'`\noptions.\n\n.. versionadded:: 1.1\nAdd the possibility to pass a list of string specifying `kind`\nfor each plot."
+                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot;\n- ``kind='both'`` results in plotting both the ICE and PD on the same\n  plot.\n\nA list of such strings can be provided to specify `kind` on a per-plot\nbasis. The length of the list should be the same as the number of\ninteraction requested in `features`.\n\n.. note::\n   ICE ('individual' or 'both') is not a valid option for 2-ways\n   interactions plot. As a result, an error will be raised.\n   2-ways interaction plots should always be configured to\n   use the 'average' kind instead.\n\n.. note::\n   The fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using\n   the slower ``method='brute'`` option.\n\n.. versionadded:: 0.24\n   Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n   options.\n\n.. versionadded:: 1.1\n   Add the possibility to pass a list of string specifying `kind`\n   for each plot."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["both", "average", "individual"]
+                                "values": ["average", "individual", "both"]
                             },
                             {
                                 "kind": "NamedType",
@@ -129890,7 +129890,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n.. versionadded:: 0.22",
+            "description": "Partial Dependence Plot (PDP).\n\nThis can also display individual partial dependencies which are often\nreferred to as: Individual Condition Expectation (ICE).\n\nIt is recommended to use\n:func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator` to create a\n:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are\nstored as attributes.\n\nRead more in\n:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`\nand the :ref:`User Guide <partial_dependence>`.\n\n    .. versionadded:: 0.22",
             "docstring": ""
         },
         {
@@ -130729,7 +130729,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "- In a multiclass setting, specifies the class for which the PDPs\nshould be computed. Note that for binary classification, the\npositive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\nshould be computed.\n\nIgnored in binary classification or classical regression settings."
+                        "description": "- In a multiclass setting, specifies the class for which the PDPs\n  should be computed. Note that for binary classification, the\n  positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n  should be computed.\n\nIgnored in binary classification or classical regression settings."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -130750,7 +130750,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -130814,7 +130814,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'auto'",
-                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n(namely\n:class:`~sklearn.ensemble.GradientBoostingClassifier`,\n:class:`~sklearn.ensemble.GradientBoostingRegressor`,\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n:class:`~sklearn.tree.DecisionTreeRegressor`,\n:class:`~sklearn.ensemble.RandomForestRegressor`\nbut is more efficient in terms of speed.\nWith this method, the target response of a\nclassifier is always the decision function, not the predicted\nprobabilities. Since the `'recursion'` method implicitly computes\nthe average of the ICEs by design, it is not compatible with ICE and\nthus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\ncomputationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\nand `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
+                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n  (namely\n  :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n  :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n  :class:`~sklearn.tree.DecisionTreeRegressor`,\n  :class:`~sklearn.ensemble.RandomForestRegressor`\n  but is more efficient in terms of speed.\n  With this method, the target response of a\n  classifier is always the decision function, not the predicted\n  probabilities. Since the `'recursion'` method implicitly computes\n  the average of the ICEs by design, it is not compatible with ICE and\n  thus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n  computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n  and `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -130933,7 +130933,7 @@
                     "docstring": {
                         "type": "Matplotlib axes or array-like of Matplotlib axes",
                         "default_value": "None",
-                        "description": "- If a single axis is passed in, it is treated as a bounding axes\nand a grid of partial dependence plots will be drawn within\nthese bounds. The `n_cols` parameter controls the number of\ncolumns in the grid.\n- If an array-like of axes are passed in, the partial dependence\nplots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\nas the single axes case."
+                        "description": "- If a single axis is passed in, it is treated as a bounding axes\n  and a grid of partial dependence plots will be drawn within\n  these bounds. The `n_cols` parameter controls the number of\n  columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n  plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n  as the single axes case."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -130959,11 +130959,11 @@
                     "docstring": {
                         "type": "{'average', 'individual', 'both'}",
                         "default_value": "'average'",
-                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot.\n\nNote that the fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using the\nslower ``method='brute'`` option."
+                        "description": " Whether to plot the partial dependence averaged across all the samples\n in the dataset or one line per sample or both.\n\n - ``kind='average'`` results in the traditional PD plot;\n - ``kind='individual'`` results in the ICE plot.\n\nNote that the fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using the\nslower ``method='brute'`` option."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["both", "average", "individual"]
+                        "values": ["average", "individual", "both"]
                     }
                 },
                 {
@@ -131047,7 +131047,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n:func:`PartialDependenceDisplay.from_estimator` does not support using the\nsame axes with multiple calls. To plot the partial dependence for\nmultiple estimators, please pass the axes created by the first call to the\nsecond call::\n\n>>> from sklearn.inspection import PartialDependenceDisplay\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> X, y = make_friedman1()\n>>> est1 = LinearRegression().fit(X, y)\n>>> est2 = RandomForestRegressor().fit(X, y)\n>>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n...                                                 [1, 2])\n>>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n...                                                 ax=disp1.axes_)\n\n.. warning::\n\nFor :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n`'recursion'` method (used by default) will not account for the `init`\npredictor of the boosting process. In practice, this will produce\nthe same values as `'brute'` up to a constant offset in the target\nresponse, provided that `init` is a constant estimator (which is the\ndefault). However, if `init` is not a constant estimator, the\npartial dependence values are incorrect for `'recursion'` because the\noffset will be sample-dependent. It is preferable to use the `'brute'`\nmethod. Note that this only applies to\n:class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0",
+            "description": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`PartialDependenceDisplay.from_estimator` does not support using the\n    same axes with multiple calls. To plot the partial dependence for\n    multiple estimators, please pass the axes created by the first call to the\n    second call::\n\n       >>> from sklearn.inspection import PartialDependenceDisplay\n       >>> from sklearn.datasets import make_friedman1\n       >>> from sklearn.linear_model import LinearRegression\n       >>> from sklearn.ensemble import RandomForestRegressor\n       >>> X, y = make_friedman1()\n       >>> est1 = LinearRegression().fit(X, y)\n       >>> est2 = RandomForestRegressor().fit(X, y)\n       >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n       ...                                                 [1, 2])\n       >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n       ...                                                 ax=disp1.axes_)\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0",
             "docstring": "Partial dependence (PD) and individual conditional expectation (ICE) plots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter. The ``len(features)`` plots are arranged in a grid with\n``n_cols`` columns. Two-way partial dependence plots are plotted as\ncontour plots. The deciles of the feature values will be shown with tick\nmarks on the x-axes for one-way plots, and on both axes for two-way\nplots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`PartialDependenceDisplay.from_estimator` does not support using the\n    same axes with multiple calls. To plot the partial dependence for\n    multiple estimators, please pass the axes created by the first call to the\n    second call::\n\n       >>> from sklearn.inspection import PartialDependenceDisplay\n       >>> from sklearn.datasets import make_friedman1\n       >>> from sklearn.linear_model import LinearRegression\n       >>> from sklearn.ensemble import RandomForestRegressor\n       >>> X, y = make_friedman1()\n       >>> est1 = LinearRegression().fit(X, y)\n       >>> est2 = RandomForestRegressor().fit(X, y)\n       >>> disp1 = PartialDependenceDisplay.from_estimator(est1, X,\n       ...                                                 [1, 2])\n       >>> disp2 = PartialDependenceDisplay.from_estimator(est2, X, [1, 2],\n       ...                                                 ax=disp1.axes_)\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. versionadded:: 1.0\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},                 default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots. It can be used to define common\n    properties for both `ice_lines_kw` and `pdp_line_kw`.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\nkind : {'average', 'individual', 'both'}, default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot.\n\n   Note that the fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using the\n   slower ``method='brute'`` option.\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the\n    y-axis. By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import PartialDependenceDisplay\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> PartialDependenceDisplay.from_estimator(clf, X, [0, (0, 1)])\n<...>\n>>> plt.show()"
         },
         {
@@ -131080,7 +131080,7 @@
                     "docstring": {
                         "type": "Matplotlib axes or array-like of Matplotlib axes",
                         "default_value": "None",
-                        "description": "- If a single axis is passed in, it is treated as a bounding axes\nand a grid of partial dependence plots will be drawn within\nthese bounds. The `n_cols` parameter controls the number of\ncolumns in the grid.\n- If an array-like of axes are passed in, the partial dependence\nplots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\nas the single axes case."
+                        "description": "- If a single axis is passed in, it is treated as a bounding axes\n    and a grid of partial dependence plots will be drawn within\n    these bounds. The `n_cols` parameter controls the number of\n    columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n    plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n    as the single axes case."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -131642,7 +131642,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "- In a multiclass setting, specifies the class for which the PDPs\nshould be computed. Note that for binary classification, the\npositive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\nshould be computed.\n\nIgnored in binary classification or classical regression settings."
+                        "description": "- In a multiclass setting, specifies the class for which the PDPs\n  should be computed. Note that for binary classification, the\n  positive class (index 1) is always used.\n- In a multioutput setting, specifies the task for which the PDPs\n  should be computed.\n\nIgnored in binary classification or classical regression settings."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -131663,7 +131663,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -131727,7 +131727,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'auto'",
-                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n(namely\n:class:`~sklearn.ensemble.GradientBoostingClassifier`,\n:class:`~sklearn.ensemble.GradientBoostingRegressor`,\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n:class:`~sklearn.tree.DecisionTreeRegressor`,\n:class:`~sklearn.ensemble.RandomForestRegressor`\nbut is more efficient in terms of speed.\nWith this method, the target response of a\nclassifier is always the decision function, not the predicted\nprobabilities. Since the `'recursion'` method implicitly computes\nthe average of the ICEs by design, it is not compatible with ICE and\nthus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\ncomputationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\nand `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
+                        "description": "The method used to calculate the averaged predictions:\n\n- `'recursion'` is only supported for some tree-based estimators\n  (namely\n  :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n  :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n  :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n  :class:`~sklearn.tree.DecisionTreeRegressor`,\n  :class:`~sklearn.ensemble.RandomForestRegressor`\n  but is more efficient in terms of speed.\n  With this method, the target response of a\n  classifier is always the decision function, not the predicted\n  probabilities. Since the `'recursion'` method implicitly computes\n  the average of the ICEs by design, it is not compatible with ICE and\n  thus `kind` must be `'average'`.\n\n- `'brute'` is supported for any estimator, but is more\n  computationally intensive.\n\n- `'auto'`: the `'recursion'` is used for estimators that support it,\n  and `'brute'` is used otherwise.\n\nPlease see :ref:`this note <pdp_method_differences>` for\ndifferences between the `'brute'` and `'recursion'` method."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -131846,7 +131846,7 @@
                     "docstring": {
                         "type": "Matplotlib axes or array-like of Matplotlib axes",
                         "default_value": "None",
-                        "description": "- If a single axis is passed in, it is treated as a bounding axes\nand a grid of partial dependence plots will be drawn within\nthese bounds. The `n_cols` parameter controls the number of\ncolumns in the grid.\n- If an array-like of axes are passed in, the partial dependence\nplots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\nas the single axes case.\n\n.. versionadded:: 0.22"
+                        "description": "- If a single axis is passed in, it is treated as a bounding axes\n  and a grid of partial dependence plots will be drawn within\n  these bounds. The `n_cols` parameter controls the number of\n  columns in the grid.\n- If an array-like of axes are passed in, the partial dependence\n  plots will be drawn directly into these axes.\n- If `None`, a figure and a bounding axes is created and treated\n  as the single axes case.\n\n.. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -131872,14 +131872,14 @@
                     "docstring": {
                         "type": "{'average', 'individual', 'both'} or list of such str",
                         "default_value": "'average'",
-                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot;\n- ``kind='both'`` results in plotting both the ICE and PD on the same\nplot.\n\nA list of such strings can be provided to specify `kind` on a per-plot\nbasis. The length of the list should be the same as the number of\ninteraction requested in `features`.\n\n.. note::\nICE ('individual' or 'both') is not a valid option for 2-ways\ninteractions plot. As a result, an error will be raised.\n2-ways interaction plots should always be configured to\nuse the 'average' kind instead.\n\n.. note::\nThe fast ``method='recursion'`` option is only available for\n``kind='average'``. Plotting individual dependencies requires using\nthe slower ``method='brute'`` option.\n\n.. versionadded:: 0.24\nAdd `kind` parameter with `'average'`, `'individual'`, and `'both'`\noptions.\n\n.. versionadded:: 1.1\nAdd the possibility to pass a list of string specifying `kind`\nfor each plot."
+                        "description": "Whether to plot the partial dependence averaged across all the samples\nin the dataset or one line per sample or both.\n\n- ``kind='average'`` results in the traditional PD plot;\n- ``kind='individual'`` results in the ICE plot;\n- ``kind='both'`` results in plotting both the ICE and PD on the same\n  plot.\n\nA list of such strings can be provided to specify `kind` on a per-plot\nbasis. The length of the list should be the same as the number of\ninteraction requested in `features`.\n\n.. note::\n   ICE ('individual' or 'both') is not a valid option for 2-ways\n   interactions plot. As a result, an error will be raised.\n   2-ways interaction plots should always be configured to\n   use the 'average' kind instead.\n\n.. note::\n   The fast ``method='recursion'`` option is only available for\n   ``kind='average'``. Plotting individual dependencies requires using\n   the slower ``method='brute'`` option.\n\n.. versionadded:: 0.24\n   Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n   options.\n\n.. versionadded:: 1.1\n   Add the possibility to pass a list of string specifying `kind`\n   for each plot."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["both", "average", "individual"]
+                                "values": ["average", "individual", "both"]
                             },
                             {
                                 "kind": "NamedType",
@@ -131969,7 +131969,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.inspection"],
-            "description": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\n\nThe ICE and PD plots can be centered with the parameter `centered`.\n\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n:func:`plot_partial_dependence` does not support using the same axes\nwith multiple calls. To plot the partial dependence for multiple\nestimators, please pass the axes created by the first call to the\nsecond call::\n\n>>> from sklearn.inspection import plot_partial_dependence\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.linear_model import LinearRegression\n>>> from sklearn.ensemble import RandomForestRegressor\n>>> X, y = make_friedman1()\n>>> est1 = LinearRegression().fit(X, y)\n>>> est2 = RandomForestRegressor().fit(X, y)\n>>> disp1 = plot_partial_dependence(est1, X,\n...                                 [1, 2])  # doctest: +SKIP\n>>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\nFor :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n`'recursion'` method (used by default) will not account for the `init`\npredictor of the boosting process. In practice, this will produce\nthe same values as `'brute'` up to a constant offset in the target\nresponse, provided that `init` is a constant estimator (which is the\ndefault). However, if `init` is not a constant estimator, the\npartial dependence values are incorrect for `'recursion'` because the\noffset will be sample-dependent. It is preferable to use the `'brute'`\nmethod. Note that this only applies to\n:class:`~sklearn.ensemble.GradientBoostingClassifier` and\n:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. deprecated:: 1.0\n`plot_partial_dependence` is deprecated in 1.0 and will be removed in\n1.2. Please use the class method:\n:func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.",
+            "description": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\n\nThe ICE and PD plots can be centered with the parameter `centered`.\n\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. deprecated:: 1.0\n   `plot_partial_dependence` is deprecated in 1.0 and will be removed in\n   1.2. Please use the class method:\n   :func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.",
             "docstring": "Partial dependence (PD) and individual conditional expectation (ICE)\nplots.\n\nPartial dependence plots, individual conditional expectation plots or an\noverlay of both of them can be plotted by setting the ``kind``\nparameter.\n\nThe ICE and PD plots can be centered with the parameter `centered`.\n\nThe ``len(features)`` plots are arranged in a grid with ``n_cols``\ncolumns. Two-way partial dependence plots are plotted as contour plots. The\ndeciles of the feature values will be shown with tick marks on the x-axes\nfor one-way plots, and on both axes for two-way plots.\n\nRead more in the :ref:`User Guide <partial_dependence>`.\n\n.. note::\n\n    :func:`plot_partial_dependence` does not support using the same axes\n    with multiple calls. To plot the partial dependence for multiple\n    estimators, please pass the axes created by the first call to the\n    second call::\n\n      >>> from sklearn.inspection import plot_partial_dependence\n      >>> from sklearn.datasets import make_friedman1\n      >>> from sklearn.linear_model import LinearRegression\n      >>> from sklearn.ensemble import RandomForestRegressor\n      >>> X, y = make_friedman1()\n      >>> est1 = LinearRegression().fit(X, y)\n      >>> est2 = RandomForestRegressor().fit(X, y)\n      >>> disp1 = plot_partial_dependence(est1, X,\n      ...                                 [1, 2])  # doctest: +SKIP\n      >>> disp2 = plot_partial_dependence(est2, X, [1, 2],\n      ...                                 ax=disp1.axes_)  # doctest: +SKIP\n\n.. warning::\n\n    For :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, the\n    `'recursion'` method (used by default) will not account for the `init`\n    predictor of the boosting process. In practice, this will produce\n    the same values as `'brute'` up to a constant offset in the target\n    response, provided that `init` is a constant estimator (which is the\n    default). However, if `init` is not a constant estimator, the\n    partial dependence values are incorrect for `'recursion'` because the\n    offset will be sample-dependent. It is preferable to use the `'brute'`\n    method. Note that this only applies to\n    :class:`~sklearn.ensemble.GradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to\n    :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and\n    :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.\n\n.. deprecated:: 1.0\n   `plot_partial_dependence` is deprecated in 1.0 and will be removed in\n   1.2. Please use the class method:\n   :func:`~sklearn.metrics.PartialDependenceDisplay.from_estimator`.\n\nParameters\n----------\nestimator : BaseEstimator\n    A fitted estimator object implementing :term:`predict`,\n    :term:`predict_proba`, or :term:`decision_function`.\n    Multioutput-multiclass classifiers are not supported.\n\nX : {array-like, dataframe} of shape (n_samples, n_features)\n    ``X`` is used to generate a grid of values for the target\n    ``features`` (where the partial dependence will be evaluated), and\n    also to generate values for the complement features when the\n    `method` is `'brute'`.\n\nfeatures : list of {int, str, pair of int, pair of str}\n    The target features for which to create the PDPs.\n    If `features[i]` is an integer or a string, a one-way PDP is created;\n    if `features[i]` is a tuple, a two-way PDP is created (only supported\n    with `kind='average'`). Each tuple must be of size 2.\n    if any entry is a string, then it must be in ``feature_names``.\n\nfeature_names : array-like of shape (n_features,), dtype=str, default=None\n    Name of each feature; `feature_names[i]` holds the name of the feature\n    with index `i`.\n    By default, the name of the feature corresponds to their numerical\n    index for NumPy array and their column name for pandas dataframe.\n\ntarget : int, default=None\n    - In a multiclass setting, specifies the class for which the PDPs\n      should be computed. Note that for binary classification, the\n      positive class (index 1) is always used.\n    - In a multioutput setting, specifies the task for which the PDPs\n      should be computed.\n\n    Ignored in binary classification or classical regression settings.\n\nresponse_method : {'auto', 'predict_proba', 'decision_function'},             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. For regressors\n    this parameter is ignored and the response is always the output of\n    :term:`predict`. By default, :term:`predict_proba` is tried first\n    and we revert to :term:`decision_function` if it doesn't exist. If\n    ``method`` is `'recursion'`, the response is always the output of\n    :term:`decision_function`.\n\nn_cols : int, default=3\n    The maximum number of columns in the grid plot. Only active when `ax`\n    is a single axis or `None`.\n\ngrid_resolution : int, default=100\n    The number of equally spaced points on the axes of the plots, for each\n    target feature.\n\npercentiles : tuple of float, default=(0.05, 0.95)\n    The lower and upper percentile used to create the extreme values\n    for the PDP axes. Must be in [0, 1].\n\nmethod : str, default='auto'\n    The method used to calculate the averaged predictions:\n\n    - `'recursion'` is only supported for some tree-based estimators\n      (namely\n      :class:`~sklearn.ensemble.GradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.GradientBoostingRegressor`,\n      :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,\n      :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,\n      :class:`~sklearn.tree.DecisionTreeRegressor`,\n      :class:`~sklearn.ensemble.RandomForestRegressor`\n      but is more efficient in terms of speed.\n      With this method, the target response of a\n      classifier is always the decision function, not the predicted\n      probabilities. Since the `'recursion'` method implicitly computes\n      the average of the ICEs by design, it is not compatible with ICE and\n      thus `kind` must be `'average'`.\n\n    - `'brute'` is supported for any estimator, but is more\n      computationally intensive.\n\n    - `'auto'`: the `'recursion'` is used for estimators that support it,\n      and `'brute'` is used otherwise.\n\n    Please see :ref:`this note <pdp_method_differences>` for\n    differences between the `'brute'` and `'recursion'` method.\n\nn_jobs : int, default=None\n    The number of CPUs to use to compute the partial dependences.\n    Computation is parallelized over features specified by the `features`\n    parameter.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nverbose : int, default=0\n    Verbose output during PD computations.\n\nline_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.\n    For one-way partial dependence plots. It can be used to define common\n    properties for both `ice_lines_kw` and `pdp_line_kw`.\n\nice_lines_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For ICE lines in the one-way partial dependence plots.\n    The key value pairs defined in `ice_lines_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\npd_line_kw : dict, default=None\n    Dictionary with keywords passed to the `matplotlib.pyplot.plot` call.\n    For partial dependence in one-way partial dependence plots.\n    The key value pairs defined in `pd_line_kw` takes priority over\n    `line_kw`.\n\n    .. versionadded:: 1.0\n\ncontour_kw : dict, default=None\n    Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.\n    For two-way partial dependence plots.\n\nax : Matplotlib axes or array-like of Matplotlib axes, default=None\n    - If a single axis is passed in, it is treated as a bounding axes\n      and a grid of partial dependence plots will be drawn within\n      these bounds. The `n_cols` parameter controls the number of\n      columns in the grid.\n    - If an array-like of axes are passed in, the partial dependence\n      plots will be drawn directly into these axes.\n    - If `None`, a figure and a bounding axes is created and treated\n      as the single axes case.\n\n    .. versionadded:: 0.22\n\nkind : {'average', 'individual', 'both'} or list of such str,             default='average'\n    Whether to plot the partial dependence averaged across all the samples\n    in the dataset or one line per sample or both.\n\n    - ``kind='average'`` results in the traditional PD plot;\n    - ``kind='individual'`` results in the ICE plot;\n    - ``kind='both'`` results in plotting both the ICE and PD on the same\n      plot.\n\n    A list of such strings can be provided to specify `kind` on a per-plot\n    basis. The length of the list should be the same as the number of\n    interaction requested in `features`.\n\n    .. note::\n       ICE ('individual' or 'both') is not a valid option for 2-ways\n       interactions plot. As a result, an error will be raised.\n       2-ways interaction plots should always be configured to\n       use the 'average' kind instead.\n\n    .. note::\n       The fast ``method='recursion'`` option is only available for\n       ``kind='average'``. Plotting individual dependencies requires using\n       the slower ``method='brute'`` option.\n\n    .. versionadded:: 0.24\n       Add `kind` parameter with `'average'`, `'individual'`, and `'both'`\n       options.\n\n    .. versionadded:: 1.1\n       Add the possibility to pass a list of string specifying `kind`\n       for each plot.\n\nsubsample : float, int or None, default=1000\n    Sampling for ICE curves when `kind` is 'individual' or 'both'.\n    If `float`, should be between 0.0 and 1.0 and represent the proportion\n    of the dataset to be used to plot ICE curves. If `int`, represents the\n    absolute number samples to use.\n\n    Note that the full dataset is still used to calculate averaged partial\n    dependence when `kind='both'`.\n\n    .. versionadded:: 0.24\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the randomness of the selected samples when subsamples is not\n    `None` and `kind` is either `'both'` or `'individual'`.\n    See :term:`Glossary <random_state>` for details.\n\n    .. versionadded:: 0.24\n\ncentered : bool, default=False\n    If `True`, the ICE and PD lines will start at the origin of the y-axis.\n    By default, no centering is done.\n\n    .. versionadded:: 1.1\n\nReturns\n-------\ndisplay : :class:`~sklearn.inspection.PartialDependenceDisplay`\n\nSee Also\n--------\npartial_dependence : Compute Partial Dependence values.\nPartialDependenceDisplay : Partial Dependence visualization.\nPartialDependenceDisplay.from_estimator : Plot Partial Dependence.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_friedman1\n>>> from sklearn.ensemble import GradientBoostingRegressor\n>>> from sklearn.inspection import plot_partial_dependence\n>>> X, y = make_friedman1()\n>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)\n>>> plot_partial_dependence(clf, X, [0, (0, 1)])  # doctest: +SKIP\n<...>\n>>> plt.show()  # doctest: +SKIP"
         },
         {
@@ -132130,11 +132130,11 @@
                     "docstring": {
                         "type": "{'nan', 'clip', 'raise'}",
                         "default_value": "'nan'",
-                        "description": "Handles how `X` values outside of the training domain are handled\nduring prediction.\n\n- 'nan', predictions will be NaN.\n- 'clip', predictions will be set to the value corresponding to\nthe nearest train interval endpoint.\n- 'raise', a `ValueError` is raised."
+                        "description": "Handles how `X` values outside of the training domain are handled\nduring prediction.\n\n- 'nan', predictions will be NaN.\n- 'clip', predictions will be set to the value corresponding to\n  the nearest train interval endpoint.\n- 'raise', a `ValueError` is raised."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["clip", "raise", "nan"]
+                        "values": ["nan", "clip", "raise"]
                     }
                 }
             ],
@@ -132421,7 +132421,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,) or (n_samples, 1)",
                         "default_value": "",
-                        "description": "Training data.\n\n.. versionchanged:: 0.24\nAlso accepts 2d array with 1 feature."
+                        "description": "Training data.\n\n.. versionchanged:: 0.24\n   Also accepts 2d array with 1 feature."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -132596,7 +132596,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,) or (n_samples, 1)",
                         "default_value": "",
-                        "description": "Data to transform.\n\n.. versionchanged:: 0.24\nAlso accepts 2d array with 1 feature."
+                        "description": "Data to transform.\n\n.. versionchanged:: 0.24\n   Also accepts 2d array with 1 feature."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -133590,7 +133590,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\nK(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24",
+            "description": "Polynomial kernel approximation via Tensor Sketch.\n\nImplements Tensor Sketch, which approximates the feature map\nof the polynomial kernel::\n\n    K(X, Y) = (gamma * <X, Y> + coef0)^degree\n\nby efficiently computing a Count Sketch of the outer product of a\nvector with itself using Fast Fourier Transforms (FFT). Read more in the\n:ref:`User Guide <polynomial_kernel_approx>`.\n\n.. versionadded:: 0.24",
             "docstring": ""
         },
         {
@@ -134983,7 +134983,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n`normalize` was deprecated in version 1.0 and will be\nremoved in 1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n   `normalize` was deprecated in version 1.0 and will be\n   removed in 1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -135121,7 +135121,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.17\nparameter *sample_weight* support to LinearRegression."
+                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.17\n   parameter *sample_weight* support to LinearRegression."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -135615,7 +135615,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\nX = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nfit_intercept=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype",
+            "description": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n    X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nfit_intercept=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype",
             "docstring": "Center and scale data.\n\nCenters data to have mean zero along axis 0. If fit_intercept=False or if\nthe X is a sparse matrix, no centering is done, but normalization can still\nbe applied. The function returns the statistics necessary to reconstruct\nthe input data, which are X_offset, y_offset, X_scale, such that the output\n\n    X = (X - X_offset) / X_scale\n\nX_scale is the L2 norm of X - X_offset. If sample_weight is not None,\nthen the weighted mean of X and y is zero, and not the mean itself. If\nfit_intercept=True, the mean, eventually weighted, is returned, independently\nof whether X was centered (option used for optimization with sparse data in\ncoordinate_descend).\n\nThis is here because nearly all linear models will want their data to be\ncentered. This function also systematically makes y consistent with X.dtype\n\nReturns\n-------\nX_out : {ndarray, sparse matrix} of shape (n_samples, n_features)\n    If copy=True a copy of the input X is triggered, otherwise operations are\n    inplace.\n    If input X is dense, then X_out is centered.\n    If normalize is True, then X_out is rescaled (dense and sparse case)\ny_out : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)\n    Centered version of y. Likely performed inplace on input y.\nX_offset : ndarray of shape (n_features,)\n    The mean per column of input X.\ny_offset : float or ndarray of shape (n_features,)\nX_scale : ndarray of shape (n_features,)\n    The standard deviation per column of input X."
         },
         {
@@ -135670,7 +135670,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight because\n\n(y - X w)' S (y - X w)\n\nwith S = diag(sample_weight) becomes\n\n||y_rescaled - X_rescaled w||_2^2\n\nwhen setting\n\ny_rescaled = sqrt(S) y\nX_rescaled = sqrt(S) X",
+            "description": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight because\n\n    (y - X w)' S (y - X w)\n\nwith S = diag(sample_weight) becomes\n\n    ||y_rescaled - X_rescaled w||_2^2\n\nwhen setting\n\n    y_rescaled = sqrt(S) y\n    X_rescaled = sqrt(S) X",
             "docstring": "Rescale data sample-wise by square root of sample_weight.\n\nFor many linear models, this enables easy support for sample_weight because\n\n    (y - X w)' S (y - X w)\n\nwith S = diag(sample_weight) becomes\n\n    ||y_rescaled - X_rescaled w||_2^2\n\nwhen setting\n\n    y_rescaled = sqrt(S) y\n    X_rescaled = sqrt(S) X\n\nReturns\n-------\nX_rescaled : {array-like, sparse matrix}\n\ny_rescaled : {array-like, sparse matrix}"
         },
         {
@@ -135968,7 +135968,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -136445,7 +136445,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "Initial value for alpha (precision of the noise).\nIf not set, alpha_init is 1/Var(y).\n\n.. versionadded:: 0.22"
+                        "description": "Initial value for alpha (precision of the noise).\nIf not set, alpha_init is 1/Var(y).\n\n    .. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -136462,7 +136462,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "None",
-                        "description": "Initial value for lambda (precision of the weights).\nIf not set, lambda_init is 1.\n\n.. versionadded:: 0.22"
+                        "description": "Initial value for lambda (precision of the weights).\nIf not set, lambda_init is 1.\n\n    .. versionadded:: 0.22"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -136513,7 +136513,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -136850,7 +136850,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n+ np.dot(X.T, X))^-1",
+            "description": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n                 + np.dot(X.T, X))^-1",
             "docstring": "Update posterior mean and compute corresponding rmse.\n\nPosterior mean is given by coef_ = scaled_sigma_ * X.T * y where\nscaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)\n                 + np.dot(X.T, X))^-1"
         },
         {
@@ -136917,7 +136917,7 @@
                     "docstring": {
                         "type": "ndarray of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.20\nparameter *sample_weight* support to BayesianRidge."
+                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.20\n   parameter *sample_weight* support to BayesianRidge."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -137082,7 +137082,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -137247,7 +137247,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n1 / (2 * n_samples) * ||y - Xw||^2_2\n+ alpha * l1_ratio * ||w||_1\n+ 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\na * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\nalpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
+            "description": "Linear regression with combined L1 and L2 priors as regularizer.\n\nMinimizes the objective function::\n\n        1 / (2 * n_samples) * ||y - Xw||^2_2\n        + alpha * l1_ratio * ||w||_1\n        + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nIf you are interested in controlling the L1 and L2 penalty\nseparately, keep in mind that this is equivalent to::\n\n        a * ||w||_1 + 0.5 * b * ||w||_2^2\n\nwhere::\n\n        alpha = a + b and l1_ratio = a / (a + b)\n\nThe parameter l1_ratio corresponds to alpha in the glmnet R package while\nalpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio\n= 1 is the lasso penalty. Currently, l1_ratio <= 0.01 is not reliable,\nunless you supply your own sequence of alpha.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": ""
         },
         {
@@ -137576,7 +137576,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -137657,7 +137657,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -137949,7 +137949,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -138114,7 +138114,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
+            "description": "Linear Model trained with L1 prior as regularizer (aka the Lasso).\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nTechnically the Lasso model is optimizing the same objective function as\nthe Elastic Net with ``l1_ratio=1.0`` (no L2 penalty).\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": ""
         },
         {
@@ -138215,7 +138215,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -138313,7 +138313,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -138440,7 +138440,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
+            "description": "Lasso linear model with iterative fitting along a regularization path.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe best model is selected by cross-validation.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": ""
         },
         {
@@ -139074,7 +139074,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -139196,7 +139196,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n(1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n+ alpha * l1_ratio * ||W||_21\n+ 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
+            "description": "Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = sum_i sqrt(sum_j W_ij ^ 2)\n\ni.e. the sum of norms of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.",
             "docstring": ""
         },
         {
@@ -139411,7 +139411,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -139462,7 +139462,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -139589,7 +139589,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n+ alpha * l1_ratio * ||W||_21\n+ 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
+            "description": "Multi-task L1/L2 ElasticNet with built-in cross-validation.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskElasticNet is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_elastic_net>`.\n\n.. versionadded:: 0.15",
             "docstring": ""
         },
         {
@@ -139798,7 +139798,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -139920,7 +139920,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
+            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nThe optimization objective for Lasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.",
             "docstring": ""
         },
         {
@@ -140021,7 +140021,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -140089,7 +140089,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- int, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -140199,7 +140199,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
+            "description": "Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\nSee glossary entry for :term:`cross-validation estimator`.\n\nThe optimization objective for MultiTaskLasso is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <multi_task_lasso>`.\n\n.. versionadded:: 0.15",
             "docstring": ""
         },
         {
@@ -140488,7 +140488,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and will be removed in\n1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and will be removed in\n    1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -140738,7 +140738,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 },
                 {
@@ -140828,7 +140828,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 }
             ],
@@ -141143,7 +141143,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n1 / (2 * n_samples) * ||y - Xw||^2_2\n+ alpha * l1_ratio * ||w||_1\n+ 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n(1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n+ alpha * l1_ratio * ||W||_21\n+ 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
+            "description": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <elastic_net>`.",
             "docstring": "Compute elastic net path with coordinate descent.\n\nThe elastic net optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    1 / (2 * n_samples) * ||y - Xw||^2_2\n    + alpha * l1_ratio * ||w||_1\n    + 0.5 * alpha * (1 - l1_ratio) * ||w||^2_2\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||_Fro^2\n    + alpha * l1_ratio * ||W||_21\n    + 0.5 * alpha * (1 - l1_ratio) * ||W||_Fro^2\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <elastic_net>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication. If ``y`` is mono-output then ``X``\n    can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or         (n_samples, n_targets)\n    Target values.\n\nl1_ratio : float, default=0.5\n    Number between 0 and 1 passed to elastic net (scaling between\n    l1 and l2 penalties). ``l1_ratio=1`` corresponds to the Lasso.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If None alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_targets),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed. It is useful\n    only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n    The initial values of the coefficients.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations or not.\n\npositive : bool, default=False\n    If set to True, forces coefficients to be positive.\n    (Only allowed when ``y.ndim == 1``).\n\ncheck_input : bool, default=True\n    If set to False, the input validation checks are skipped (including the\n    Gram matrix when provided). It is assumed that they are handled\n    by the caller.\n\n**params : kwargs\n    Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n    The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or             (n_targets, n_features, n_alphas)\n    Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n    The number of iterations taken by the coordinate descent optimizer to\n    reach the specified tolerance for each alpha.\n    (Is returned when ``return_n_iter`` is set to True).\n\nSee Also\n--------\nMultiTaskElasticNet : Multi-task ElasticNet model trained with L1/L2 mixed-norm     as regularizer.\nMultiTaskElasticNetCV : Multi-task L1/L2 ElasticNet with built-in cross-validation.\nElasticNet : Linear regression with combined L1 and L2 priors as regularizer.\nElasticNetCV : Elastic Net model with iterative fitting along a regularization path.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`."
         },
         {
@@ -141417,7 +141417,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.linear_model"],
-            "description": "Compute Lasso path with coordinate descent.\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n(1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <lasso>`.",
+            "description": "Compute Lasso path with coordinate descent.\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <lasso>`.",
             "docstring": "Compute Lasso path with coordinate descent.\n\nThe Lasso optimization function varies for mono and multi-outputs.\n\nFor mono-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nFor multi-output tasks it is::\n\n    (1 / (2 * n_samples)) * ||Y - XW||^2_Fro + alpha * ||W||_21\n\nWhere::\n\n    ||W||_21 = \\sum_i \\sqrt{\\sum_j w_{ij}^2}\n\ni.e. the sum of norm of each row.\n\nRead more in the :ref:`User Guide <lasso>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Training data. Pass directly as Fortran-contiguous data to avoid\n    unnecessary memory duplication. If ``y`` is mono-output then ``X``\n    can be sparse.\n\ny : {array-like, sparse matrix} of shape (n_samples,) or         (n_samples, n_targets)\n    Target values.\n\neps : float, default=1e-3\n    Length of the path. ``eps=1e-3`` means that\n    ``alpha_min / alpha_max = 1e-3``.\n\nn_alphas : int, default=100\n    Number of alphas along the regularization path.\n\nalphas : ndarray, default=None\n    List of alphas where to compute the models.\n    If ``None`` alphas are set automatically.\n\nprecompute : 'auto', bool or array-like of shape             (n_features, n_features), default='auto'\n    Whether to use a precomputed Gram matrix to speed up\n    calculations. If set to ``'auto'`` let us decide. The Gram\n    matrix can also be passed as argument.\n\nXy : array-like of shape (n_features,) or (n_features, n_targets),         default=None\n    Xy = np.dot(X.T, y) that can be precomputed. It is useful\n    only when the Gram matrix is precomputed.\n\ncopy_X : bool, default=True\n    If ``True``, X will be copied; else, it may be overwritten.\n\ncoef_init : ndarray of shape (n_features, ), default=None\n    The initial values of the coefficients.\n\nverbose : bool or int, default=False\n    Amount of verbosity.\n\nreturn_n_iter : bool, default=False\n    Whether to return the number of iterations or not.\n\npositive : bool, default=False\n    If set to True, forces coefficients to be positive.\n    (Only allowed when ``y.ndim == 1``).\n\n**params : kwargs\n    Keyword arguments passed to the coordinate descent solver.\n\nReturns\n-------\nalphas : ndarray of shape (n_alphas,)\n    The alphas along the path where models are computed.\n\ncoefs : ndarray of shape (n_features, n_alphas) or             (n_targets, n_features, n_alphas)\n    Coefficients along the path.\n\ndual_gaps : ndarray of shape (n_alphas,)\n    The dual gaps at the end of the optimization for each alpha.\n\nn_iters : list of int\n    The number of iterations taken by the coordinate descent optimizer to\n    reach the specified tolerance for each alpha.\n\nSee Also\n--------\nlars_path : Compute Least Angle Regression or Lasso path using LARS\n    algorithm.\nLasso : The Lasso is a linear model that estimates sparse coefficients.\nLassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.\nLassoCV : Lasso linear model with iterative fitting along a regularization\n    path.\nLassoLarsCV : Cross-validated Lasso using the LARS algorithm.\nsklearn.decomposition.sparse_encode : Estimator that can be used to\n    transform signals into sparse linear combination of atoms from a fixed.\n\nNotes\n-----\nFor an example, see\n:ref:`examples/linear_model/plot_lasso_coordinate_descent_path.py\n<sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py>`.\n\nTo avoid unnecessary memory duplication the X argument of the fit method\nshould be directly passed as a Fortran-contiguous numpy array.\n\nNote that in certain cases, the Lars solver may be significantly\nfaster to implement this functionality. In particular, linear\ninterpolation can be used to retrieve model coefficients between the\nvalues output by lars_path\n\nExamples\n--------\n\nComparing lasso_path and lars_path with interpolation:\n\n>>> import numpy as np\n>>> from sklearn.linear_model import lasso_path\n>>> X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T\n>>> y = np.array([1, 2, 3.1])\n>>> # Use lasso_path to compute a coefficient path\n>>> _, coef_path, _ = lasso_path(X, y, alphas=[5., 1., .5])\n>>> print(coef_path)\n[[0.         0.         0.46874778]\n [0.2159048  0.4425765  0.23689075]]\n\n>>> # Now use lars_path and 1D linear interpolation to compute the\n>>> # same path\n>>> from sklearn.linear_model import lars_path\n>>> alphas, active, coef_path_lars = lars_path(X, y, method='lasso')\n>>> from scipy import interpolate\n>>> coef_path_continuous = interpolate.interp1d(alphas[::-1],\n...                                             coef_path_lars[:, ::-1])\n>>> print(coef_path_continuous([5., 1., .5]))\n[[0.         0.         0.46915237]\n [0.2159048  0.4425765  0.23668876]]"
         },
         {
@@ -141813,11 +141813,11 @@
                     "docstring": {
                         "type": "{'auto', 'identity', 'log'}",
                         "default_value": "'auto'",
-                        "description": "The link function of the GLM, i.e. mapping from linear predictor\n`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\nthe link depending on the chosen `power` parameter as follows:\n\n- 'identity' for ``power <= 0``, e.g. for the Normal distribution\n- 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\ndistributions"
+                        "description": "The link function of the GLM, i.e. mapping from linear predictor\n`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets\nthe link depending on the chosen `power` parameter as follows:\n\n- 'identity' for ``power <= 0``, e.g. for the Normal distribution\n- 'log' for ``power > 0``, e.g. for Poisson, Gamma and Inverse Gaussian\n  distributions"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log", "auto", "identity"]
+                        "values": ["log", "identity", "auto"]
                     }
                 },
                 {
@@ -141986,7 +141986,7 @@
                     "docstring": {
                         "type": "'lbfgs'",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\nCalls scipy's L-BFGS-B optimizer."
+                        "description": "Algorithm to use in the optimization problem:\n\n'lbfgs'\n    Calls scipy's L-BFGS-B optimizer."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -142065,7 +142065,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
+            "description": "Regression via a penalized Generalized Linear Model (GLM).\n\nGLMs based on a reproductive Exponential Dispersion Model (EDM) aim at fitting and\npredicting the mean of the target y as y_pred=h(X*w) with coefficients w.\nTherefore, the fit minimizes the following objective function with L2 priors as\nregularizer::\n\n    1/(2*sum(s_i)) * sum(s_i * deviance(y_i, h(x_i*w)) + 1/2 * alpha * ||w||_2^2\n\nwith inverse link function h, s=sample_weight and per observation (unit) deviance\ndeviance(y_i, h(x_i*w)). Note that for an EDM, 1/2 * deviance is the negative\nlog-likelihood up to a constant (in w) term.\nThe parameter ``alpha`` corresponds to the lambda parameter in glmnet.\n\nInstead of implementing the EDM family and a link function separately, we directly\nuse the loss functions `from sklearn._loss` which have the link functions included\nin them for performance reasons. We pick the loss functions that implement\n(1/2 times) EDM deviances.\n\nRead more in the :ref:`User Guide <Generalized_linear_regression>`.\n\n.. versionadded:: 0.23",
             "docstring": ""
         },
         {
@@ -142872,7 +142872,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -143379,7 +143379,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -143426,7 +143426,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -143708,7 +143708,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -143974,7 +143974,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -144017,7 +144017,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -144228,7 +144228,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -144772,7 +144772,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -145728,7 +145728,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\nloss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n+ 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\ngradient = X.T @ loss.gradient + l2_reg_strength * coef\nhessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\nif fit_intercept:\nn_dof =  n_features + 1\nelse:\nn_dof = n_features\n\nif base_loss.is_multiclass:\ncoef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\nelse:\ncoef.shape = (n_dof,)\n\nThe intercept term is at the end of the coef array:\nif base_loss.is_multiclass:\nif coef.shape (n_classes, n_dof):\nintercept = coef[:, -1]\nif coef.shape (n_classes * n_dof,)\nintercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\nintercept.shape = (n_classes,)\nelse:\nintercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\ncoef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.",
+            "description": "General class for loss functions with raw_prediction = X @ coef + intercept.\n\nNote that raw_prediction is also known as linear predictor.\n\nThe loss is the sum of per sample losses and includes a term for L2\nregularization::\n\n    loss = sum_i s_i loss(y_i, X_i @ coef + intercept)\n           + 1/2 * l2_reg_strength * ||coef||_2^2\n\nwith sample weights s_i=1 if sample_weight=None.\n\nGradient and hessian, for simplicity without intercept, are::\n\n    gradient = X.T @ loss.gradient + l2_reg_strength * coef\n    hessian = X.T @ diag(loss.hessian) @ X + l2_reg_strength * identity\n\nConventions:\n    if fit_intercept:\n        n_dof =  n_features + 1\n    else:\n        n_dof = n_features\n\n    if base_loss.is_multiclass:\n        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)\n    else:\n        coef.shape = (n_dof,)\n\n    The intercept term is at the end of the coef array:\n    if base_loss.is_multiclass:\n        if coef.shape (n_classes, n_dof):\n            intercept = coef[:, -1]\n        if coef.shape (n_classes * n_dof,)\n            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]\n        intercept.shape = (n_classes,)\n    else:\n        intercept = coef[-1]\n\nNote: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as\n\n    coef.reshape((n_classes, -1), order=\"F\")\n\nThe option order=\"F\" makes coef[:, i] contiguous. This, in turn, makes the\ncoefficients without intercept, coef[:, :-1], contiguous and speeds up\nmatrix-vector computations.\n\nNote: If the average loss per sample is wanted instead of the sum of the loss per\nsample, one can simply use a rescaled sample_weight such that\nsum(sample_weight) = 1.",
             "docstring": ""
         },
         {
@@ -146464,11 +146464,11 @@
                     "docstring": {
                         "type": "{'l1', 'l2', 'elasticnet', 'none'}",
                         "default_value": "'l2'",
-                        "description": "Specify the norm of the penalty:\n\n- `'none'`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\nSome penalties may not work with some solvers. See the parameter\n`solver` below, to know the compatibility between the penalty and\nsolver.\n\n.. versionadded:: 0.19\nl1 penalty with SAGA solver (allowing 'multinomial' + L1)"
+                        "description": "Specify the norm of the penalty:\n\n- `'none'`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\n   Some penalties may not work with some solvers. See the parameter\n   `solver` below, to know the compatibility between the penalty and\n   solver.\n\n.. versionadded:: 0.19\n   l1 penalty with SAGA solver (allowing 'multinomial' + L1)"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "none", "l1"]
+                        "values": ["none", "l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -146566,7 +146566,7 @@
                     "docstring": {
                         "type": "dict or 'balanced'",
                         "default_value": "None",
-                        "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n*class_weight='balanced'*"
+                        "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n   *class_weight='balanced'*"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -146618,11 +146618,11 @@
                     "docstring": {
                         "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag'\nand 'saga' are faster for large ones;\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n'lbfgs' handle multinomial loss;\n- 'liblinear' is limited to one-versus-rest schemes.\n\n.. warning::\nThe choice of the algorithm depends on the penalty chosen:\nSupported penalties by solver:\n\n- 'newton-cg'   -   ['l2', 'none']\n- 'lbfgs'       -   ['l2', 'none']\n- 'liblinear'   -   ['l1', 'l2']\n- 'sag'         -   ['l2', 'none']\n- 'saga'        -   ['elasticnet', 'l1', 'l2', 'none']\n\n.. note::\n'sag' and 'saga' fast convergence is only guaranteed on\nfeatures with approximately the same scale. You can\npreprocess the data with a scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\nRefer to the User Guide for more information regarding\n:class:`LogisticRegression` and more specifically the\n`Table <https://scikit-learn.org/dev/modules/linear_model.html#logistic-regression>`_\nsummarazing solver/penalty supports.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver.\n.. versionchanged:: 0.22\nThe default solver changed from 'liblinear' to 'lbfgs' in 0.22."
+                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' is limited to one-versus-rest schemes.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen:\n   Supported penalties by solver:\n\n   - 'newton-cg'   -   ['l2', 'none']\n   - 'lbfgs'       -   ['l2', 'none']\n   - 'liblinear'   -   ['l1', 'l2']\n   - 'sag'         -   ['l2', 'none']\n   - 'saga'        -   ['elasticnet', 'l1', 'l2', 'none']\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on\n   features with approximately the same scale. You can\n   preprocess the data with a scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\n   Refer to the User Guide for more information regarding\n   :class:`LogisticRegression` and more specifically the\n   `Table <https://scikit-learn.org/dev/modules/linear_model.html#logistic-regression>`_\n   summarazing solver/penalty supports.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver.\n.. versionchanged:: 0.22\n    The default solver changed from 'liblinear' to 'lbfgs' in 0.22."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["newton-cg", "liblinear", "sag", "saga", "lbfgs"]
+                        "values": ["saga", "sag", "liblinear", "newton-cg", "lbfgs"]
                     }
                 },
                 {
@@ -146652,11 +146652,11 @@
                     "docstring": {
                         "type": "{'auto', 'ovr', 'multinomial'}",
                         "default_value": "'auto'",
-                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\nStochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\nDefault changed from 'ovr' to 'auto' in 0.22."
+                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n   Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n    Default changed from 'ovr' to 'auto' in 0.22."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovr", "multinomial", "auto"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -146686,7 +146686,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nUseless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n*warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers."
+                        "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nUseless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n   *warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -146807,7 +146807,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,) default=None",
                         "default_value": "",
-                        "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight.\n\n.. versionadded:: 0.17\n*sample_weight* support to LogisticRegression."
+                        "description": "Array of weights that are assigned to individual samples.\nIf not provided, then each sample is given unit weight.\n\n.. versionadded:: 0.17\n   *sample_weight* support to LogisticRegression."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -146982,7 +146982,7 @@
                     "docstring": {
                         "type": "int or cross-validation generator",
                         "default_value": "None",
-                        "description": "The default cross-validation generator used is Stratified K-Folds.\nIf an integer is provided, then it is the number of folds used.\nSee the module :mod:`sklearn.model_selection` module for the\nlist of possible cross-validation objects.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "The default cross-validation generator used is Stratified K-Folds.\nIf an integer is provided, then it is the number of folds used.\nSee the module :mod:`sklearn.model_selection` module for the\nlist of possible cross-validation objects.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -147025,11 +147025,11 @@
                     "docstring": {
                         "type": "{'l1', 'l2', 'elasticnet'}",
                         "default_value": "'l2'",
-                        "description": "Specify the norm of the penalty:\n\n- `'l2'`: add a L2 penalty term (used by default);\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\nSome penalties may not work with some solvers. See the parameter\n`solver` below, to know the compatibility between the penalty and\nsolver."
+                        "description": "Specify the norm of the penalty:\n\n- `'l2'`: add a L2 penalty term (used by default);\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\n   Some penalties may not work with some solvers. See the parameter\n   `solver` below, to know the compatibility between the penalty and\n   solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -147068,11 +147068,11 @@
                     "docstring": {
                         "type": "{'newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'}",
                         "default_value": "'lbfgs'",
-                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag'\nand 'saga' are faster for large ones;\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n'lbfgs' handle multinomial loss;\n- 'liblinear' might be slower in :class:`LogisticRegressionCV`\nbecause it does not handle warm-starting. 'liblinear' is\nlimited to one-versus-rest schemes.\n\n.. warning::\nThe choice of the algorithm depends on the penalty chosen:\n\n- 'newton-cg'   -   ['l2']\n- 'lbfgs'       -   ['l2']\n- 'liblinear'   -   ['l1', 'l2']\n- 'sag'         -   ['l2']\n- 'saga'        -   ['elasticnet', 'l1', 'l2']\n\n.. note::\n'sag' and 'saga' fast convergence is only guaranteed on features\nwith approximately the same scale. You can preprocess the data with\na scaler from :mod:`sklearn.preprocessing`.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver."
+                        "description": "Algorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n    - For small datasets, 'liblinear' is a good choice, whereas 'sag'\n      and 'saga' are faster for large ones;\n    - For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n      'lbfgs' handle multinomial loss;\n    - 'liblinear' might be slower in :class:`LogisticRegressionCV`\n      because it does not handle warm-starting. 'liblinear' is\n      limited to one-versus-rest schemes.\n\n.. warning::\n   The choice of the algorithm depends on the penalty chosen:\n\n   - 'newton-cg'   -   ['l2']\n   - 'lbfgs'       -   ['l2']\n   - 'liblinear'   -   ['l1', 'l2']\n   - 'sag'         -   ['l2']\n   - 'saga'        -   ['elasticnet', 'l1', 'l2']\n\n.. note::\n   'sag' and 'saga' fast convergence is only guaranteed on features\n   with approximately the same scale. You can preprocess the data with\n   a scaler from :mod:`sklearn.preprocessing`.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["newton-cg", "liblinear", "sag", "saga", "lbfgs"]
+                        "values": ["saga", "sag", "liblinear", "newton-cg", "lbfgs"]
                     }
                 },
                 {
@@ -147119,7 +147119,7 @@
                     "docstring": {
                         "type": "dict or 'balanced'",
                         "default_value": "None",
-                        "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\nclass_weight == 'balanced'"
+                        "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n   class_weight == 'balanced'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -147213,7 +147213,7 @@
                     "docstring": {
                         "type": "{'auto, 'ovr', 'multinomial'}",
                         "default_value": "'auto'",
-                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\nStochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\nDefault changed from 'ovr' to 'auto' in 0.22."
+                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n   Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n    Default changed from 'ovr' to 'auto' in 0.22."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -147823,7 +147823,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["newton-cg", "liblinear", "sag", "saga", "lbfgs"]
+                        "values": ["saga", "sag", "liblinear", "newton-cg", "lbfgs"]
                     }
                 },
                 {
@@ -147840,7 +147840,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -147891,7 +147891,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovr", "multinomial", "auto"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -148152,7 +148152,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["newton-cg", "liblinear", "sag", "saga", "lbfgs"]
+                        "values": ["saga", "sag", "liblinear", "newton-cg", "lbfgs"]
                     }
                 },
                 {
@@ -148229,7 +148229,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -148259,11 +148259,11 @@
                     "docstring": {
                         "type": "{'ovr', 'multinomial', 'auto'}",
                         "default_value": "'auto'",
-                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\nStochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\nDefault changed from 'ovr' to 'auto' in 0.22."
+                        "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\n   Stochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\n    Default changed from 'ovr' to 'auto' in 0.22."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ovr", "multinomial", "auto"]
+                        "values": ["multinomial", "ovr", "auto"]
                     }
                 },
                 {
@@ -148465,7 +148465,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -148630,7 +148630,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -148664,7 +148664,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -149177,7 +149177,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0. It will default\nto False in 1.2 and be removed in 1.4."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0. It will default\n    to False in 1.2 and be removed in 1.4."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -149810,7 +149810,7 @@
                     "docstring": {
                         "type": "dict, {class_label: weight} or \"balanced\" or None",
                         "default_value": "None",
-                        "description": "Preset for the class_weight fit parameter.\n\nWeights associated with classes. If not given, all classes\nare supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\n.. versionadded:: 0.17\nparameter *class_weight* to automatically weight samples."
+                        "description": "Preset for the class_weight fit parameter.\n\nWeights associated with classes. If not given, all classes\nare supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\n.. versionadded:: 0.17\n   parameter *class_weight* to automatically weight samples."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -149844,7 +149844,7 @@
                     "docstring": {
                         "type": "bool or int",
                         "default_value": "False",
-                        "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\nparameter *average* to use weights averaging in SGD."
+                        "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\n   parameter *average* to use weights averaging in SGD."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -150327,7 +150327,7 @@
                     "docstring": {
                         "type": "bool or int",
                         "default_value": "False",
-                        "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\nparameter *average* to use weights averaging in SGD."
+                        "description": "When set to True, computes the averaged SGD weights and stores the\nresult in the ``coef_`` attribute. If set to an int greater than 1,\naveraging will begin once the total number of samples seen reaches\naverage. So average=10 will begin averaging after seeing 10 samples.\n\n.. versionadded:: 0.19\n   parameter *average* to use weights averaging in SGD."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -150576,7 +150576,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -150952,7 +150952,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["revised simplex", "highs-ipm", "highs-ds", "highs", "interior-point"]
+                        "values": ["revised simplex", "highs-ds", "interior-point", "highs", "highs-ipm"]
                     }
                 },
                 {
@@ -151096,7 +151096,7 @@
                     "docstring": {
                         "type": "object",
                         "default_value": "None",
-                        "description": "Base estimator object which implements the following methods:\n\n* `fit(X, y)`: Fit model to given training data and target values.\n* `score(X, y)`: Returns the mean accuracy on the given test data,\nwhich is used for the stop criterion defined by `stop_score`.\nAdditionally, the score is used to decide which of two equally\nlarge consensus sets is chosen as the better one.\n* `predict(X)`: Returns predicted values using the linear model,\nwhich is used to compute residual error using loss function.\n\nIf `estimator` is None, then\n:class:`~sklearn.linear_model.LinearRegression` is used for\ntarget values of dtype float.\n\nNote that the current implementation only supports regression\nestimators."
+                        "description": "Base estimator object which implements the following methods:\n\n * `fit(X, y)`: Fit model to given training data and target values.\n * `score(X, y)`: Returns the mean accuracy on the given test data,\n   which is used for the stop criterion defined by `stop_score`.\n   Additionally, the score is used to decide which of two equally\n   large consensus sets is chosen as the better one.\n * `predict(X)`: Returns predicted values using the linear model,\n   which is used to compute residual error using loss function.\n\nIf `estimator` is None, then\n:class:`~sklearn.linear_model.LinearRegression` is used for\ntarget values of dtype float.\n\nNote that the current implementation only supports regression\nestimators."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151113,7 +151113,7 @@
                     "docstring": {
                         "type": "int (>= 1) or float ([0, 1])",
                         "default_value": "None",
-                        "description": "Minimum number of samples chosen randomly from original data. Treated\nas an absolute number of samples for `min_samples >= 1`, treated as a\nrelative number `ceil(min_samples * X.shape[0])` for\n`min_samples < 1`. This is typically chosen as the minimal number of\nsamples necessary to estimate the given `estimator`. By default a\n``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n`min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\ndependent upon the model, so if a `estimator` other than\n:class:`linear_model.LinearRegression` is used, the user is\nencouraged to provide a value.\n\n.. deprecated:: 1.0\nNot setting `min_samples` explicitly will raise an error in version\n1.2 for models other than\n:class:`~sklearn.linear_model.LinearRegression`. To keep the old\ndefault behavior, set `min_samples=X.shape[1] + 1` explicitly."
+                        "description": "Minimum number of samples chosen randomly from original data. Treated\nas an absolute number of samples for `min_samples >= 1`, treated as a\nrelative number `ceil(min_samples * X.shape[0])` for\n`min_samples < 1`. This is typically chosen as the minimal number of\nsamples necessary to estimate the given `estimator`. By default a\n``sklearn.linear_model.LinearRegression()`` estimator is assumed and\n`min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly\ndependent upon the model, so if a `estimator` other than\n:class:`linear_model.LinearRegression` is used, the user is\nencouraged to provide a value.\n\n.. deprecated:: 1.0\n   Not setting `min_samples` explicitly will raise an error in version\n   1.2 for models other than\n   :class:`~sklearn.linear_model.LinearRegression`. To keep the old\n   default behavior, set `min_samples=X.shape[1] + 1` explicitly."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -151258,7 +151258,7 @@
                     "docstring": {
                         "type": "float in range [0, 1]",
                         "default_value": "0.99",
-                        "description": "RANSAC iteration stops if at least one outlier-free set of the training\ndata is sampled in RANSAC. This requires to generate at least N\nsamples (iterations)::\n\nN >= log(1 - probability) / log(1 - e**m)\n\nwhere the probability (confidence) is typically set to high value such\nas 0.99 (the default) and e is the current fraction of inliers w.r.t.\nthe total number of samples."
+                        "description": "RANSAC iteration stops if at least one outlier-free set of the training\ndata is sampled in RANSAC. This requires to generate at least N\nsamples (iterations)::\n\n    N >= log(1 - probability) / log(1 - e**m)\n\nwhere the probability (confidence) is typically set to high value such\nas 0.99 (the default) and e is the current fraction of inliers w.r.t.\nthe total number of samples."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -151284,7 +151284,7 @@
                     "docstring": {
                         "type": "str, callable",
                         "default_value": "'absolute_error'",
-                        "description": "String inputs, 'absolute_error' and 'squared_error' are supported which\nfind the absolute error and squared error per sample respectively.\n\nIf ``loss`` is a callable, then it should be a function that takes\ntwo arrays as inputs, the true and predicted value and returns a 1-D\narray with the i-th value of the array corresponding to the loss\non ``X[i]``.\n\nIf the loss on a sample is greater than the ``residual_threshold``,\nthen this sample is classified as an outlier.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\nThe loss 'squared_loss' was deprecated in v1.0 and will be removed\nin version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\nThe loss 'absolute_loss' was deprecated in v1.0 and will be removed\nin version 1.2. Use `loss='absolute_error'` which is equivalent."
+                        "description": "String inputs, 'absolute_error' and 'squared_error' are supported which\nfind the absolute error and squared error per sample respectively.\n\nIf ``loss`` is a callable, then it should be a function that takes\ntwo arrays as inputs, the true and predicted value and returns a 1-D\narray with the i-th value of the array corresponding to the loss\non ``X[i]``.\n\nIf the loss on a sample is greater than the ``residual_threshold``,\nthen this sample is classified as an outlier.\n\n.. versionadded:: 0.18\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.0\n    The loss 'absolute_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='absolute_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -151336,7 +151336,7 @@
                     "docstring": {
                         "type": "object",
                         "default_value": "\"deprecated\"",
-                        "description": "Use `estimator` instead.\n\n.. deprecated:: 1.1\n`base_estimator` is deprecated and will be removed in 1.3.\nUse `estimator` instead."
+                        "description": "Use `estimator` instead.\n\n.. deprecated:: 1.1\n    `base_estimator` is deprecated and will be removed in 1.3.\n    Use `estimator` instead."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151723,7 +151723,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and\nwill be removed in 1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -151791,11 +151791,11 @@
                     "docstring": {
                         "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}",
                         "default_value": "'auto'",
-                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\ncoefficients. It is the most stable solver, in particular more stable\nfor singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\nobtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\nscipy.sparse.linalg.cg. As an iterative algorithm, this solver is\nmore appropriate than 'cholesky' for large-scale data\n(possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\nscipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\nprocedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\nits improved, unbiased version named SAGA. Both methods also use an\niterative procedure, and are often faster than other solvers when\nboth n_samples and n_features are large. Note that 'sag' and\n'saga' fast convergence is only guaranteed on features with\napproximately the same scale. You can preprocess the data with a\nscaler from sklearn.preprocessing.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n`scipy.optimize.minimize`. It can be used only when `positive`\nis True.\n\nAll solvers except 'svd' support both dense and sparse data. However, only\n'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n`fit_intercept` is True.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver."
+                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n  coefficients. It is the most stable solver, in particular more stable\n  for singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n  obtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n  scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n  more appropriate than 'cholesky' for large-scale data\n  (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n  scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n  procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n  its improved, unbiased version named SAGA. Both methods also use an\n  iterative procedure, and are often faster than other solvers when\n  both n_samples and n_features are large. Note that 'sag' and\n  'saga' fast convergence is only guaranteed on features with\n  approximately the same scale. You can preprocess the data with a\n  scaler from sklearn.preprocessing.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n  `scipy.optimize.minimize`. It can be used only when `positive`\n  is True.\n\nAll solvers except 'svd' support both dense and sparse data. However, only\n'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n`fit_intercept` is True.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lsqr", "svd", "saga", "cholesky", "lbfgs", "sparse_cg", "auto"]
+                        "values": ["sparse_cg", "svd", "auto", "saga", "sag", "cholesky", "lbfgs", "lsqr"]
                     }
                 },
                 {
@@ -151825,7 +151825,7 @@
                     "docstring": {
                         "type": "int, RandomState instance",
                         "default_value": "None",
-                        "description": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\nSee :term:`Glossary <random_state>` for details.\n\n.. versionadded:: 0.17\n`random_state` to support Stochastic Average Gradient."
+                        "description": "Used when ``solver`` == 'sag' or 'saga' to shuffle the data.\nSee :term:`Glossary <random_state>` for details.\n\n.. versionadded:: 0.17\n   `random_state` to support Stochastic Average Gradient."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -152008,7 +152008,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and\nwill be removed in 1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -152102,11 +152102,11 @@
                     "docstring": {
                         "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}",
                         "default_value": "'auto'",
-                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\ncoefficients. It is the most stable solver, in particular more stable\nfor singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\nobtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\nscipy.sparse.linalg.cg. As an iterative algorithm, this solver is\nmore appropriate than 'cholesky' for large-scale data\n(possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\nscipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\nprocedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\nits unbiased and more flexible version named SAGA. Both methods\nuse an iterative procedure, and are often faster than other solvers\nwhen both n_samples and n_features are large. Note that 'sag' and\n'saga' fast convergence is only guaranteed on features with\napproximately the same scale. You can preprocess the data with a\nscaler from sklearn.preprocessing.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n`scipy.optimize.minimize`. It can be used only when `positive`\nis True."
+                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n  coefficients. It is the most stable solver, in particular more stable\n  for singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n  obtain a closed-form solution.\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n  scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n  more appropriate than 'cholesky' for large-scale data\n  (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n  scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n  procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n  its unbiased and more flexible version named SAGA. Both methods\n  use an iterative procedure, and are often faster than other solvers\n  when both n_samples and n_features are large. Note that 'sag' and\n  'saga' fast convergence is only guaranteed on features with\n  approximately the same scale. You can preprocess the data with a\n  scaler from sklearn.preprocessing.\n\n  .. versionadded:: 0.17\n     Stochastic Average Gradient descent solver.\n  .. versionadded:: 0.19\n     SAGA solver.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n  `scipy.optimize.minimize`. It can be used only when `positive`\n  is True."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lsqr", "svd", "saga", "cholesky", "lbfgs", "sparse_cg", "auto"]
+                        "values": ["sparse_cg", "svd", "auto", "saga", "sag", "cholesky", "lbfgs", "lsqr"]
                     }
                 },
                 {
@@ -152232,7 +152232,7 @@
                     "docstring": {
                         "type": "float or ndarray of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight.\n\n.. versionadded:: 0.17\n*sample_weight* support to RidgeClassifier."
+                        "description": "Individual weights for each sample. If given a float, every sample\nwill have the same weight.\n\n.. versionadded:: 0.17\n   *sample_weight* support to RidgeClassifier."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -152319,7 +152319,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n``normalize`` was deprecated in version 1.0 and\nwill be removed in 1.2."
+                        "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n    ``normalize`` was deprecated in version 1.0 and\n    will be removed in 1.2."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -155930,7 +155930,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Solve Ridge regression via LSQR.\n\nWe expect that y is always mean centered.\nIf X is dense, we expect it to be mean centered such that we can solve\n||y - Xw||_2^2 + alpha * ||w||_2^2\n\nIf X is sparse, we expect X_offset to be given such that we can solve\n||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\nWith sample weights S=diag(sample_weight), this becomes\n||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\nand we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\nthis case, X_offset is the sample_weight weighted mean of X before scaling by\nsqrt(S). The objective then reads\n||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2",
+            "description": "Solve Ridge regression via LSQR.\n\nWe expect that y is always mean centered.\nIf X is dense, we expect it to be mean centered such that we can solve\n    ||y - Xw||_2^2 + alpha * ||w||_2^2\n\nIf X is sparse, we expect X_offset to be given such that we can solve\n    ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\nWith sample weights S=diag(sample_weight), this becomes\n    ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\nand we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\nthis case, X_offset is the sample_weight weighted mean of X before scaling by\nsqrt(S). The objective then reads\n   ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2",
             "docstring": "Solve Ridge regression via LSQR.\n\nWe expect that y is always mean centered.\nIf X is dense, we expect it to be mean centered such that we can solve\n    ||y - Xw||_2^2 + alpha * ||w||_2^2\n\nIf X is sparse, we expect X_offset to be given such that we can solve\n    ||y - (X - X_offset)w||_2^2 + alpha * ||w||_2^2\n\nWith sample weights S=diag(sample_weight), this becomes\n    ||sqrt(S) (y - (X - X_offset) w)||_2^2 + alpha * ||w||_2^2\nand we expect y and X to already be rescaled, i.e. sqrt(S) @ y, sqrt(S) @ X. In\nthis case, X_offset is the sample_weight weighted mean of X before scaling by\nsqrt(S). The objective then reads\n   ||y - (X - sqrt(S) X_offset) w)||_2^2 + alpha * ||w||_2^2"
         },
         {
@@ -156238,11 +156238,11 @@
                     "docstring": {
                         "type": "{'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}",
                         "default_value": "'auto'",
-                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\ncoefficients. It is the most stable solver, in particular more stable\nfor singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\nobtain a closed-form solution via a Cholesky decomposition of\ndot(X.T, X)\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\nscipy.sparse.linalg.cg. As an iterative algorithm, this solver is\nmore appropriate than 'cholesky' for large-scale data\n(possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\nscipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\nprocedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\nits improved, unbiased version named SAGA. Both methods also use an\niterative procedure, and are often faster than other solvers when\nboth n_samples and n_features are large. Note that 'sag' and\n'saga' fast convergence is only guaranteed on features with\napproximately the same scale. You can preprocess the data with a\nscaler from sklearn.preprocessing.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n`scipy.optimize.minimize`. It can be used only when `positive`\nis True.\n\nAll solvers except 'svd' support both dense and sparse data. However, only\n'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n`fit_intercept` is True.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver."
+                        "description": "Solver to use in the computational routines:\n\n- 'auto' chooses the solver automatically based on the type of data.\n\n- 'svd' uses a Singular Value Decomposition of X to compute the Ridge\n  coefficients. It is the most stable solver, in particular more stable\n  for singular matrices than 'cholesky' at the cost of being slower.\n\n- 'cholesky' uses the standard scipy.linalg.solve function to\n  obtain a closed-form solution via a Cholesky decomposition of\n  dot(X.T, X)\n\n- 'sparse_cg' uses the conjugate gradient solver as found in\n  scipy.sparse.linalg.cg. As an iterative algorithm, this solver is\n  more appropriate than 'cholesky' for large-scale data\n  (possibility to set `tol` and `max_iter`).\n\n- 'lsqr' uses the dedicated regularized least-squares routine\n  scipy.sparse.linalg.lsqr. It is the fastest and uses an iterative\n  procedure.\n\n- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses\n  its improved, unbiased version named SAGA. Both methods also use an\n  iterative procedure, and are often faster than other solvers when\n  both n_samples and n_features are large. Note that 'sag' and\n  'saga' fast convergence is only guaranteed on features with\n  approximately the same scale. You can preprocess the data with a\n  scaler from sklearn.preprocessing.\n\n- 'lbfgs' uses L-BFGS-B algorithm implemented in\n  `scipy.optimize.minimize`. It can be used only when `positive`\n  is True.\n\nAll solvers except 'svd' support both dense and sparse data. However, only\n'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when\n`fit_intercept` is True.\n\n.. versionadded:: 0.17\n   Stochastic Average Gradient descent solver.\n.. versionadded:: 0.19\n   SAGA solver."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sag", "lsqr", "svd", "saga", "cholesky", "lbfgs", "sparse_cg", "auto"]
+                        "values": ["sparse_cg", "svd", "auto", "saga", "sag", "cholesky", "lbfgs", "lsqr"]
                     }
                 },
                 {
@@ -156451,7 +156451,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log", "multinomial", "squared"]
+                        "values": ["log", "squared", "multinomial"]
                     }
                 },
                 {
@@ -156588,11 +156588,11 @@
                     "docstring": {
                         "type": "{'log', 'squared', 'multinomial'}",
                         "default_value": "'log'",
-                        "description": "Loss function that will be optimized:\n-'log' is the binary logistic loss, as used in LogisticRegression.\n-'squared' is the squared loss, as used in Ridge.\n-'multinomial' is the multinomial logistic loss, as used in\nLogisticRegression.\n\n.. versionadded:: 0.18\n*loss='multinomial'*"
+                        "description": "Loss function that will be optimized:\n-'log' is the binary logistic loss, as used in LogisticRegression.\n-'squared' is the squared loss, as used in Ridge.\n-'multinomial' is the multinomial logistic loss, as used in\n LogisticRegression.\n\n.. versionadded:: 0.18\n   *loss='multinomial'*"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["log", "multinomial", "squared"]
+                        "values": ["log", "squared", "multinomial"]
                     }
                 },
                 {
@@ -156754,7 +156754,7 @@
                     "docstring": {
                         "type": "dict",
                         "default_value": "None",
-                        "description": "The initialization parameters used for warm starting. Warm starting is\ncurrently used in LogisticRegression but not in Ridge.\nIt contains:\n- 'coef': the weight vector, with the intercept in last line\nif the intercept is fitted.\n- 'gradient_memory': the scalar gradient for all seen samples.\n- 'sum_gradient': the sum of gradient over all seen samples,\nfor each feature.\n- 'intercept_sum_gradient': the sum of gradient over all seen\nsamples, for the intercept.\n- 'seen': array of boolean describing the seen samples.\n- 'num_seen': the number of seen samples."
+                        "description": "The initialization parameters used for warm starting. Warm starting is\ncurrently used in LogisticRegression but not in Ridge.\nIt contains:\n    - 'coef': the weight vector, with the intercept in last line\n        if the intercept is fitted.\n    - 'gradient_memory': the scalar gradient for all seen samples.\n    - 'sum_gradient': the sum of gradient over all seen samples,\n        for each feature.\n    - 'intercept_sum_gradient': the sum of gradient over all seen\n        samples, for the intercept.\n    - 'seen': array of boolean describing the seen samples.\n    - 'num_seen': the number of seen samples."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -159811,21 +159811,21 @@
                     "docstring": {
                         "type": "{'hinge', 'log_loss', 'log', 'modified_huber', 'squared_hinge',        'perceptron', 'squared_error', 'huber', 'epsilon_insensitive',        'squared_epsilon_insensitive'}",
                         "default_value": "'hinge'",
-                        "description": "The loss function to be used.\n\n- 'hinge' gives a linear SVM.\n- 'log_loss' gives logistic regression, a probabilistic classifier.\n- 'modified_huber' is another smooth loss that brings tolerance to\noutliers as well as probability estimates.\n- 'squared_hinge' is like hinge but is quadratically penalized.\n- 'perceptron' is the linear loss used by the perceptron algorithm.\n- The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n'squared_epsilon_insensitive' are designed for regression but can be useful\nin classification as well; see\n:class:`~sklearn.linear_model.SGDRegressor` for a description.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\nThe loss 'squared_loss' was deprecated in v1.0 and will be removed\nin version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.1\nThe loss 'log' was deprecated in v1.1 and will be removed\nin version 1.3. Use `loss='log_loss'` which is equivalent."
+                        "description": "The loss function to be used.\n\n- 'hinge' gives a linear SVM.\n- 'log_loss' gives logistic regression, a probabilistic classifier.\n- 'modified_huber' is another smooth loss that brings tolerance to\n   outliers as well as probability estimates.\n- 'squared_hinge' is like hinge but is quadratically penalized.\n- 'perceptron' is the linear loss used by the perceptron algorithm.\n- The other losses, 'squared_error', 'huber', 'epsilon_insensitive' and\n  'squared_epsilon_insensitive' are designed for regression but can be useful\n  in classification as well; see\n  :class:`~sklearn.linear_model.SGDRegressor` for a description.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent.\n\n.. deprecated:: 1.1\n    The loss 'log' was deprecated in v1.1 and will be removed\n    in version 1.3. Use `loss='log_loss'` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
                         "values": [
+                            "log_loss",
                             "modified_huber",
                             "squared_error",
-                            "squared_epsilon_insensitive",
-                            "squared_hinge",
-                            "epsilon_insensitive",
-                            "hinge",
-                            "log_loss",
                             "log",
+                            "epsilon_insensitive",
+                            "squared_hinge",
+                            "squared_epsilon_insensitive",
+                            "perceptron",
                             "huber",
-                            "perceptron"
+                            "hinge"
                         ]
                     }
                 },
@@ -159843,7 +159843,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -160048,7 +160048,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'optimal'",
-                        "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\nwhere `t0` is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\nEach time n_iter_no_change consecutive epochs fail to decrease the\ntraining loss by tol or fail to increase validation score by tol if\n`early_stopping` is `True`, the current learning rate is divided by 5.\n\n.. versionadded:: 0.20\nAdded 'adaptive' option"
+                        "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n  where `t0` is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': `eta = eta0`, as long as the training keeps decreasing.\n  Each time n_iter_no_change consecutive epochs fail to decrease the\n  training loss by tol or fail to increase validation score by tol if\n  `early_stopping` is `True`, the current learning rate is divided by 5.\n\n    .. versionadded:: 0.20\n        Added 'adaptive' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -160099,7 +160099,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to `True`, it will automatically set aside\na stratified fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least tol for n_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20\nAdded 'early_stopping' option"
+                        "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to `True`, it will automatically set aside\na stratified fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least tol for n_iter_no_change consecutive epochs.\n\n.. versionadded:: 0.20\n    Added 'early_stopping' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -160116,7 +160116,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\nValues must be in the range `(0.0, 1.0)`.\n\n.. versionadded:: 0.20\nAdded 'validation_fraction' option"
+                        "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\nValues must be in the range `(0.0, 1.0)`.\n\n.. versionadded:: 0.20\n    Added 'validation_fraction' option"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -160146,7 +160146,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\nInteger values must be in the range `[1, max_iter)`.\n\n.. versionadded:: 0.20\nAdded 'n_iter_no_change' option"
+                        "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\nInteger values must be in the range `[1, max_iter)`.\n\n.. versionadded:: 0.20\n    Added 'n_iter_no_change' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -160577,11 +160577,11 @@
                     "docstring": {
                         "type": "{'constant', 'optimal', 'invscaling', 'adaptive'}",
                         "default_value": "'optimal'",
-                        "description": "The learning rate schedule to use with `fit`. (If using `partial_fit`,\nlearning rate must be controlled directly).\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\nwhere t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\nEach time n_iter_no_change consecutive epochs fail to decrease the\ntraining loss by tol or fail to increase validation score by tol if\nearly_stopping is True, the current learning rate is divided by 5."
+                        "description": "The learning rate schedule to use with `fit`. (If using `partial_fit`,\nlearning rate must be controlled directly).\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n  where t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\n  Each time n_iter_no_change consecutive epochs fail to decrease the\n  training loss by tol or fail to increase validation score by tol if\n  early_stopping is True, the current learning rate is divided by 5."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["adaptive", "constant", "invscaling", "optimal"]
+                        "values": ["constant", "invscaling", "adaptive", "optimal"]
                     }
                 },
                 {
@@ -161572,7 +161572,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'squared_error'",
-                        "description": "The loss function to be used. The possible values are 'squared_error',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\nThe 'squared_error' refers to the ordinary least squares fit.\n'huber' modifies 'squared_error' to focus less on getting outliers\ncorrect by switching from squared to linear loss past a distance of\nepsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\nlinear past that; this is the loss function used in SVR.\n'squared_epsilon_insensitive' is the same but becomes squared loss past\na tolerance of epsilon.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\nThe loss 'squared_loss' was deprecated in v1.0 and will be removed\nin version 1.2. Use `loss='squared_error'` which is equivalent."
+                        "description": "The loss function to be used. The possible values are 'squared_error',\n'huber', 'epsilon_insensitive', or 'squared_epsilon_insensitive'\n\nThe 'squared_error' refers to the ordinary least squares fit.\n'huber' modifies 'squared_error' to focus less on getting outliers\ncorrect by switching from squared to linear loss past a distance of\nepsilon. 'epsilon_insensitive' ignores errors less than epsilon and is\nlinear past that; this is the loss function used in SVR.\n'squared_epsilon_insensitive' is the same but becomes squared loss past\na tolerance of epsilon.\n\nMore details about the losses formulas can be found in the\n:ref:`User Guide <sgd_mathematical_formulation>`.\n\n.. deprecated:: 1.0\n    The loss 'squared_loss' was deprecated in v1.0 and will be removed\n    in version 1.2. Use `loss='squared_error'` which is equivalent."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -161593,7 +161593,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "elasticnet", "l1"]
+                        "values": ["l1", "elasticnet", "l2"]
                     }
                 },
                 {
@@ -161768,7 +161768,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'invscaling'",
-                        "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\nwhere t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\nEach time n_iter_no_change consecutive epochs fail to decrease the\ntraining loss by tol or fail to increase validation score by tol if\nearly_stopping is True, the current learning rate is divided by 5.\n\n.. versionadded:: 0.20\nAdded 'adaptive' option"
+                        "description": "The learning rate schedule:\n\n- 'constant': `eta = eta0`\n- 'optimal': `eta = 1.0 / (alpha * (t + t0))`\n  where t0 is chosen by a heuristic proposed by Leon Bottou.\n- 'invscaling': `eta = eta0 / pow(t, power_t)`\n- 'adaptive': eta = eta0, as long as the training keeps decreasing.\n  Each time n_iter_no_change consecutive epochs fail to decrease the\n  training loss by tol or fail to increase validation score by tol if\n  early_stopping is True, the current learning rate is divided by 5.\n\n    .. versionadded:: 0.20\n        Added 'adaptive' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -161819,7 +161819,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to True, it will automatically set aside\na fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least `tol` for `n_iter_no_change` consecutive\nepochs.\n\n.. versionadded:: 0.20\nAdded 'early_stopping' option"
+                        "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to True, it will automatically set aside\na fraction of training data as validation and terminate\ntraining when validation score returned by the `score` method is not\nimproving by at least `tol` for `n_iter_no_change` consecutive\nepochs.\n\n.. versionadded:: 0.20\n    Added 'early_stopping' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -161836,7 +161836,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.1",
-                        "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\n\n.. versionadded:: 0.20\nAdded 'validation_fraction' option"
+                        "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if `early_stopping` is True.\n\n.. versionadded:: 0.20\n    Added 'validation_fraction' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -161853,7 +161853,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.20\nAdded 'n_iter_no_change' option"
+                        "description": "Number of iterations with no improvement to wait before stopping\nfitting.\nConvergence is checked against the training loss or the\nvalidation loss depending on the `early_stopping` parameter.\n\n.. versionadded:: 0.20\n    Added 'n_iter_no_change' option"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -163119,7 +163119,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "dense", "arpack"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -163170,7 +163170,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["D", "auto", "FW"]
+                        "values": ["FW", "D", "auto"]
                     }
                 },
                 {
@@ -163187,7 +163187,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "brute", "auto", "kd_tree"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -163607,11 +163607,11 @@
                     "docstring": {
                         "type": "{'auto', 'arpack', 'dense'}",
                         "default_value": "'auto'",
-                        "description": "The solver used to compute the eigenvectors. The available options are:\n\n- `'auto'` : algorithm will attempt to choose the best method for input\ndata.\n- `'arpack'` : use arnoldi iteration in shift-invert mode. For this\nmethod, M may be a dense matrix, sparse matrix, or general linear\noperator.\n- `'dense'`  : use standard dense matrix operations for the eigenvalue\ndecomposition. For this method, M must be an array or matrix type.\nThis method should be avoided for large problems.\n\n.. warning::\nARPACK can be unstable for some problems.  It is best to try several\nrandom seeds in order to check results."
+                        "description": "The solver used to compute the eigenvectors. The available options are:\n\n- `'auto'` : algorithm will attempt to choose the best method for input\n  data.\n- `'arpack'` : use arnoldi iteration in shift-invert mode. For this\n  method, M may be a dense matrix, sparse matrix, or general linear\n  operator.\n- `'dense'`  : use standard dense matrix operations for the eigenvalue\n  decomposition. For this method, M must be an array or matrix type.\n  This method should be avoided for large problems.\n\n.. warning::\n   ARPACK can be unstable for some problems.  It is best to try several\n   random seeds in order to check results."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "dense", "arpack"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -163658,11 +163658,11 @@
                     "docstring": {
                         "type": "{'standard', 'hessian', 'modified', 'ltsa'}",
                         "default_value": "'standard'",
-                        "description": "- `standard`: use the standard locally linear embedding algorithm. see\nreference [1]_\n- `hessian`: use the Hessian eigenmap method. This method requires\n``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\nreference [2]_\n- `modified`: use the modified locally linear embedding algorithm.\nsee reference [3]_\n- `ltsa`: use local tangent space alignment algorithm. see\nreference [4]_"
+                        "description": "- `standard`: use the standard locally linear embedding algorithm. see\n  reference [1]_\n- `hessian`: use the Hessian eigenmap method. This method requires\n  ``n_neighbors > n_components * (1 + (n_components + 1) / 2``. see\n  reference [2]_\n- `modified`: use the modified locally linear embedding algorithm.\n  see reference [3]_\n- `ltsa`: use local tangent space alignment algorithm. see\n  reference [4]_"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ltsa", "modified", "standard", "hessian"]
+                        "values": ["ltsa", "hessian", "standard", "modified"]
                     }
                 },
                 {
@@ -163713,7 +163713,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "brute", "auto", "kd_tree"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -164264,11 +164264,11 @@
                     "docstring": {
                         "type": "{'auto', 'arpack', 'dense'}",
                         "default_value": "'auto'",
-                        "description": "auto : algorithm will attempt to choose the best method for input data\n\narpack : use arnoldi iteration in shift-invert mode.\nFor this method, M may be a dense matrix, sparse matrix,\nor general linear operator.\nWarning: ARPACK can be unstable for some problems.  It is\nbest to try several random seeds in order to check results.\n\ndense  : use standard dense matrix operations for the eigenvalue\ndecomposition.  For this method, M must be an array\nor matrix type.  This method should be avoided for\nlarge problems."
+                        "description": "auto : algorithm will attempt to choose the best method for input data\n\narpack : use arnoldi iteration in shift-invert mode.\n            For this method, M may be a dense matrix, sparse matrix,\n            or general linear operator.\n            Warning: ARPACK can be unstable for some problems.  It is\n            best to try several random seeds in order to check results.\n\ndense  : use standard dense matrix operations for the eigenvalue\n            decomposition.  For this method, M must be an array\n            or matrix type.  This method should be avoided for\n            large problems."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "dense", "arpack"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -164315,11 +164315,11 @@
                     "docstring": {
                         "type": "{'standard', 'hessian', 'modified', 'ltsa'}",
                         "default_value": "'standard'",
-                        "description": "standard : use the standard locally linear embedding algorithm.\nsee reference [1]_\nhessian  : use the Hessian eigenmap method.  This method requires\nn_neighbors > n_components * (1 + (n_components + 1) / 2.\nsee reference [2]_\nmodified : use the modified locally linear embedding algorithm.\nsee reference [3]_\nltsa     : use local tangent space alignment algorithm\nsee reference [4]_"
+                        "description": "standard : use the standard locally linear embedding algorithm.\n           see reference [1]_\nhessian  : use the Hessian eigenmap method.  This method requires\n           n_neighbors > n_components * (1 + (n_components + 1) / 2.\n           see reference [2]_\nmodified : use the modified locally linear embedding algorithm.\n           see reference [3]_\nltsa     : use local tangent space alignment algorithm\n           see reference [4]_"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ltsa", "modified", "standard", "hessian"]
+                        "values": ["ltsa", "hessian", "standard", "modified"]
                     }
                 },
                 {
@@ -164482,11 +164482,11 @@
                     "docstring": {
                         "type": "{'auto', 'arpack', 'dense'}",
                         "default_value": "'arpack'",
-                        "description": "auto : algorithm will attempt to choose the best method for input data\narpack : use arnoldi iteration in shift-invert mode.\nFor this method, M may be a dense matrix, sparse matrix,\nor general linear operator.\nWarning: ARPACK can be unstable for some problems.  It is\nbest to try several random seeds in order to check results.\ndense  : use standard dense matrix operations for the eigenvalue\ndecomposition.  For this method, M must be an array\nor matrix type.  This method should be avoided for\nlarge problems."
+                        "description": "auto : algorithm will attempt to choose the best method for input data\narpack : use arnoldi iteration in shift-invert mode.\n            For this method, M may be a dense matrix, sparse matrix,\n            or general linear operator.\n            Warning: ARPACK can be unstable for some problems.  It is\n            best to try several random seeds in order to check results.\ndense  : use standard dense matrix operations for the eigenvalue\n            decomposition.  For this method, M must be an array\n            or matrix type.  This method should be avoided for\n            large problems."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "dense", "arpack"]
+                        "values": ["dense", "arpack", "auto"]
                     }
                 },
                 {
@@ -164735,7 +164735,7 @@
                     "docstring": {
                         "type": "{'euclidean', 'precomputed'}",
                         "default_value": "'euclidean'",
-                        "description": "Dissimilarity measure to use:\n\n- 'euclidean':\nPairwise Euclidean distances between points in the dataset.\n\n- 'precomputed':\nPre-computed dissimilarities are passed directly to ``fit`` and\n``fit_transform``."
+                        "description": "Dissimilarity measure to use:\n\n- 'euclidean':\n    Pairwise Euclidean distances between points in the dataset.\n\n- 'precomputed':\n    Pre-computed dissimilarities are passed directly to ``fit`` and\n    ``fit_transform``."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -165354,7 +165354,7 @@
                     "docstring": {
                         "type": "{'nearest_neighbors', 'rbf', 'precomputed',                 'precomputed_nearest_neighbors'} or callable",
                         "default_value": "'nearest_neighbors'",
-                        "description": "How to construct the affinity matrix.\n- 'nearest_neighbors' : construct the affinity matrix by computing a\ngraph of nearest neighbors.\n- 'rbf' : construct the affinity matrix by computing a radial basis\nfunction (RBF) kernel.\n- 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n- 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\nof precomputed nearest neighbors, and constructs the affinity matrix\nby selecting the ``n_neighbors`` nearest neighbors.\n- callable : use passed in function as affinity\nthe function takes in data matrix (n_samples, n_features)\nand return affinity matrix (n_samples, n_samples)."
+                        "description": "How to construct the affinity matrix.\n - 'nearest_neighbors' : construct the affinity matrix by computing a\n   graph of nearest neighbors.\n - 'rbf' : construct the affinity matrix by computing a radial basis\n   function (RBF) kernel.\n - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.\n - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph\n   of precomputed nearest neighbors, and constructs the affinity matrix\n   by selecting the ``n_neighbors`` nearest neighbors.\n - callable : use passed in function as affinity\n   the function takes in data matrix (n_samples, n_features)\n   and return affinity matrix (n_samples, n_samples)."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -165397,7 +165397,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigen vectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\nWhen using `eigen_solver == 'amg'`,\nit is necessary to also fix the global numpy seed with\n`np.random.seed(int)` to get deterministic results. See\nhttps://github.com/pyamg/pyamg/issues/139 for further\ninformation."
+                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigen vectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\n    When using `eigen_solver == 'amg'`,\n    it is necessary to also fix the global numpy seed with\n    `np.random.seed(int)` to get deterministic results. See\n    https://github.com/pyamg/pyamg/issues/139 for further\n    information."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -165431,7 +165431,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "lobpcg", "arpack"]
+                        "values": ["lobpcg", "amg", "arpack"]
                     }
                 },
                 {
@@ -165527,7 +165527,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\nTraining vector, where `n_samples` is the number of samples\nand `n_features` is the number of features.\n\nIf affinity is \"precomputed\"\nX : array-like of shape (n_samples, n_samples),\nInterpret X as precomputed adjacency graph computed from\nsamples.\n\nY: Ignored",
+            "description": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\n    If affinity is \"precomputed\"\n    X : array-like of shape (n_samples, n_samples),\n    Interpret X as precomputed adjacency graph computed from\n    samples.\n\nY: Ignored",
             "docstring": "Calculate the affinity matrix from data\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    Training vector, where `n_samples` is the number of samples\n    and `n_features` is the number of features.\n\n    If affinity is \"precomputed\"\n    X : array-like of shape (n_samples, n_samples),\n    Interpret X as precomputed adjacency graph computed from\n    samples.\n\nY: Ignored\n\nReturns\n-------\naffinity_matrix of shape (n_samples, n_samples)"
         },
         {
@@ -165910,7 +165910,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["amg", "lobpcg", "arpack"]
+                        "values": ["lobpcg", "amg", "arpack"]
                     }
                 },
                 {
@@ -165923,7 +165923,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "None",
-                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigen vectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\nWhen using `eigen_solver == 'amg'`,\nit is necessary to also fix the global numpy seed with\n`np.random.seed(int)` to get deterministic results. See\nhttps://github.com/pyamg/pyamg/issues/139 for further\ninformation."
+                        "description": "A pseudo random number generator used for the initialization\nof the lobpcg eigen vectors decomposition when `eigen_solver ==\n'amg'`, and for the K-Means initialization. Use an int to make\nthe results deterministic across calls (See\n:term:`Glossary <random_state>`).\n\n.. note::\n    When using `eigen_solver == 'amg'`,\n    it is necessary to also fix the global numpy seed with\n    `np.random.seed(int)` to get deterministic results. See\n    https://github.com/pyamg/pyamg/issues/139 for further\n    information."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -166133,7 +166133,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "300",
-                        "description": "Maximum number of iterations without progress before we abort the\noptimization, used after 250 initial iterations with early\nexaggeration. Note that progress is only checked every 50 iterations so\nthis value is rounded to the next multiple of 50.\n\n.. versionadded:: 0.17\nparameter *n_iter_without_progress* to control stopping criteria."
+                        "description": "Maximum number of iterations without progress before we abort the\noptimization, used after 250 initial iterations with early\nexaggeration. Note that progress is only checked every 50 iterations so\nthis value is rounded to the next multiple of 50.\n\n.. versionadded:: 0.17\n   parameter *n_iter_without_progress* to control stopping criteria."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -166283,7 +166283,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'barnes_hut'",
-                        "description": "By default the gradient calculation algorithm uses Barnes-Hut\napproximation running in O(NlogN) time. method='exact'\nwill run on the slower, but exact, algorithm in O(N^2) time. The\nexact algorithm should be used when nearest-neighbor errors need\nto be better than 3%. However, the exact method cannot scale to\nmillions of examples.\n\n.. versionadded:: 0.17\nApproximate optimization *method* via the Barnes-Hut."
+                        "description": "By default the gradient calculation algorithm uses Barnes-Hut\napproximation running in O(NlogN) time. method='exact'\nwill run on the slower, but exact, algorithm in O(N^2) time. The\nexact algorithm should be used when nearest-neighbor errors need\nto be better than 3%. However, the exact method cannot scale to\nmillions of examples.\n\n.. versionadded:: 0.17\n   Approximate optimization *method* via the Barnes-Hut."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -166334,7 +166334,7 @@
                     "docstring": {
                         "type": "True",
                         "default_value": "'deprecated'",
-                        "description": "This parameter has no effect since distance values are always squared\nsince 1.1.\n\n.. deprecated:: 1.1\n`square_distances` has no effect from 1.1 and will be removed in\n1.3."
+                        "description": "This parameter has no effect since distance values are always squared\nsince 1.1.\n\n.. deprecated:: 1.1\n     `square_distances` has no effect from 1.1 and will be removed in\n     1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -167434,7 +167434,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.manifold"],
-            "description": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\nT(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n\\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.",
+            "description": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.",
             "docstring": "Expresses to what extent the local structure is retained.\n\nThe trustworthiness is within [0, 1]. It is defined as\n\n.. math::\n\n    T(k) = 1 - \\frac{2}{nk (2n - 3k - 1)} \\sum^n_{i=1}\n        \\sum_{j \\in \\mathcal{N}_{i}^{k}} \\max(0, (r(i, j) - k))\n\nwhere for each sample i, :math:`\\mathcal{N}_{i}^{k}` are its k nearest\nneighbors in the output space, and every sample j is its :math:`r(i, j)`-th\nnearest neighbor in the input space. In other words, any unexpected nearest\nneighbors in the output space are penalised in proportion to their rank in\nthe input space.\n\nParameters\n----------\nX : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)\n    If the metric is 'precomputed' X must be a square distance\n    matrix. Otherwise it contains a sample per row.\n\nX_embedded : ndarray of shape (n_samples, n_components)\n    Embedding of the training data in low-dimensional space.\n\nn_neighbors : int, default=5\n    The number of neighbors that will be considered. Should be fewer than\n    `n_samples / 2` to ensure the trustworthiness to lies within [0, 1], as\n    mentioned in [1]_. An error will be raised otherwise.\n\nmetric : str or callable, default='euclidean'\n    Which metric to use for computing pairwise distances between samples\n    from the original input space. If metric is 'precomputed', X must be a\n    matrix of pairwise distances or squared distances. Otherwise, for a list\n    of available metrics, see the documentation of argument metric in\n    `sklearn.pairwise.pairwise_distances` and metrics listed in\n    `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the\n    \"cosine\" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\ntrustworthiness : float\n    Trustworthiness of the low-dimensional embedding.\n\nReferences\n----------\n.. [1] Jarkko Venna and Samuel Kaski. 2001. Neighborhood\n       Preservation in Nonlinear Projection Methods: An Experimental Study.\n       In Proceedings of the International Conference on Artificial Neural Networks\n       (ICANN '01). Springer-Verlag, Berlin, Heidelberg, 485-491.\n\n.. [2] Laurens van der Maaten. Learning a Parametric Embedding by Preserving\n       Local Structure. Proceedings of the Twelth International Conference on\n       Artificial Intelligence and Statistics, PMLR 5:384-391, 2009."
         },
         {
@@ -167588,11 +167588,11 @@
                     "docstring": {
                         "type": "{None, 'micro', 'macro', 'samples', 'weighted'}",
                         "default_value": "'macro'",
-                        "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\nCalculate metrics globally by considering each element of the label\nindicator matrix as a label.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average, weighted\nby support (the number of true instances for each label).\n``'samples'``:\nCalculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
+                        "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\n    Calculate metrics globally by considering each element of the label\n    indicator matrix as a label.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label).\n``'samples'``:\n    Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["weighted", "samples", "micro", "macro"]
+                        "values": ["samples", "macro", "micro", "weighted"]
                     }
                 },
                 {
@@ -167635,7 +167635,7 @@
                     "docstring": {
                         "type": "callable",
                         "default_value": "",
-                        "description": "The binary metric function to use that accepts the following as input:\ny_true_target : array, shape = [n_samples_target]\nSome sub-array of y_true for a pair of classes designated\npositive and negative in the one-vs-one scheme.\ny_score_target : array, shape = [n_samples_target]\nScores corresponding to the probability estimates\nof a sample belonging to the designated positive class label"
+                        "description": "The binary metric function to use that accepts the following as input:\n    y_true_target : array, shape = [n_samples_target]\n        Some sub-array of y_true for a pair of classes designated\n        positive and negative in the one-vs-one scheme.\n    y_score_target : array, shape = [n_samples_target]\n        Scores corresponding to the probability estimates\n        of a sample belonging to the designated positive class label"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -167686,11 +167686,11 @@
                     "docstring": {
                         "type": "{'macro', 'weighted'}",
                         "default_value": "'macro'",
-                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores:\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean. This does not take label imbalance into account. Classes\nare assumed to be uniformly distributed.\n``'weighted'``:\nCalculate metrics for each label, taking into account the\nprevalence of the classes."
+                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores:\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean. This does not take label imbalance into account. Classes\n    are assumed to be uniformly distributed.\n``'weighted'``:\n    Calculate metrics for each label, taking into account the\n    prevalence of the classes."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["weighted", "macro"]
+                        "values": ["macro", "weighted"]
                     }
                 }
             ],
@@ -168399,7 +168399,7 @@
                     "docstring": {
                         "type": "int or str",
                         "default_value": "None",
-                        "description": "Label of the positive class. `pos_label` will be inferred in the\nfollowing manner:\n\n* if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n* else if `y_true` contains string, an error will be raised and\n`pos_label` should be explicitly specified;\n* otherwise, `pos_label` defaults to the greater label,\ni.e. `np.unique(y_true)[-1]`."
+                        "description": "Label of the positive class. `pos_label` will be inferred in the\nfollowing manner:\n\n* if `y_true` in {-1, 1} or {0, 1}, `pos_label` defaults to 1;\n* else if `y_true` contains string, an error will be raised and\n  `pos_label` should be explicitly specified;\n* otherwise, `pos_label` defaults to the greater label,\n  i.e. `np.unique(y_true)[-1]`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -168673,7 +168673,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["quadratic", "linear"]
+                        "values": ["linear", "quadratic"]
                     }
                 },
                 {
@@ -168697,7 +168697,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n\\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide <cohen_kappa>`.",
+            "description": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n    \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide <cohen_kappa>`.",
             "docstring": "Cohen's kappa: a statistic that measures inter-annotator agreement.\n\nThis function computes Cohen's kappa [1]_, a score that expresses the level\nof agreement between two annotators on a classification problem. It is\ndefined as\n\n.. math::\n    \\kappa = (p_o - p_e) / (1 - p_e)\n\nwhere :math:`p_o` is the empirical probability of agreement on the label\nassigned to any sample (the observed agreement ratio), and :math:`p_e` is\nthe expected agreement when both annotators assign labels randomly.\n:math:`p_e` is estimated using a per-annotator empirical prior over the\nclass labels [2]_.\n\nRead more in the :ref:`User Guide <cohen_kappa>`.\n\nParameters\n----------\ny1 : array of shape (n_samples,)\n    Labels assigned by the first annotator.\n\ny2 : array of shape (n_samples,)\n    Labels assigned by the second annotator. The kappa statistic is\n    symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.\n\nlabels : array-like of shape (n_classes,), default=None\n    List of labels to index the matrix. This may be used to select a\n    subset of labels. If `None`, all labels that appear at least once in\n    ``y1`` or ``y2`` are used.\n\nweights : {'linear', 'quadratic'}, default=None\n    Weighting type to calculate the score. `None` means no weighted;\n    \"linear\" means linear weighted; \"quadratic\" means quadratic weighted.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nkappa : float\n    The kappa statistic, which is a number between -1 and 1. The maximum\n    value means complete agreement; zero or lower means chance agreement.\n\nReferences\n----------\n.. [1] :doi:`J. Cohen (1960). \"A coefficient of agreement for nominal scales\".\n       Educational and Psychological Measurement 20(1):37-46.\n       <10.1177/001316446002000104>`\n.. [2] `R. Artstein and M. Poesio (2008). \"Inter-coder agreement for\n       computational linguistics\". Computational Linguistics 34(4):555-596\n       <https://www.mitpressjournals.org/doi/pdf/10.1162/coli.07-034-R2>`_.\n.. [3] `Wikipedia entry for the Cohen's kappa\n        <https://en.wikipedia.org/wiki/Cohen%27s_kappa>`_."
         },
         {
@@ -168788,7 +168788,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["all", "true", "pred"]
+                        "values": ["pred", "true", "all"]
                     }
                 }
             ],
@@ -168866,7 +168866,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "None",
-                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\nParameter `labels` improved for multiclass problem."
+                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n   Parameter `labels` improved for multiclass problem."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -168909,14 +168909,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} or None",
                         "default_value": "'binary'",
-                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance; it can result in an\nF-score that is not between precision and recall.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification where this differs from\n:func:`accuracy_score`)."
+                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance; it can result in an\n    F-score that is not between precision and recall.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification where this differs from\n    :func:`accuracy_score`)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "weighted", "macro", "micro"]
+                                "values": ["samples", "weighted", "micro", "binary", "macro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -168976,7 +168976,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a harmonic mean of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\nF1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
+            "description": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a harmonic mean of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n    F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
             "docstring": "Compute the F1 score, also known as balanced F-score or F-measure.\n\nThe F1 score can be interpreted as a harmonic mean of the precision and\nrecall, where an F1 score reaches its best value at 1 and worst score at 0.\nThe relative contribution of precision and recall to the F1 score are\nequal. The formula for the F1 score is::\n\n    F1 = 2 * (precision * recall) / (precision + recall)\n\nIn the multi-class and multi-label case, this is the average of\nthe F1 score of each class with weighting depending on the ``average``\nparameter.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\nParameters\n----------\ny_true : 1d array-like, or label indicator array / sparse matrix\n    Ground truth (correct) target values.\n\ny_pred : 1d array-like, or label indicator array / sparse matrix\n    Estimated targets as returned by a classifier.\n\nlabels : array-like, default=None\n    The set of labels to include when ``average != 'binary'``, and their\n    order if ``average is None``. Labels present in the data can be\n    excluded, for example to calculate a multiclass average ignoring a\n    majority negative class, while labels not present in the data will\n    result in 0 components in a macro average. For multilabel targets,\n    labels are column indices. By default, all labels in ``y_true`` and\n    ``y_pred`` are used in sorted order.\n\n    .. versionchanged:: 0.17\n       Parameter `labels` improved for multiclass problem.\n\npos_label : str or int, default=1\n    The class to report if ``average='binary'`` and the data is binary.\n    If the data are multiclass or multilabel, this will be ignored;\n    setting ``labels=[pos_label]`` and ``average != 'binary'`` will report\n    scores for that label only.\n\naverage : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None,             default='binary'\n    This parameter is required for multiclass/multilabel targets.\n    If ``None``, the scores for each class are returned. Otherwise, this\n    determines the type of averaging performed on the data:\n\n    ``'binary'``:\n        Only report results for the class specified by ``pos_label``.\n        This is applicable only if targets (``y_{true,pred}``) are binary.\n    ``'micro'``:\n        Calculate metrics globally by counting the total true positives,\n        false negatives and false positives.\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean.  This does not take label imbalance into account.\n    ``'weighted'``:\n        Calculate metrics for each label, and find their average weighted\n        by support (the number of true instances for each label). This\n        alters 'macro' to account for label imbalance; it can result in an\n        F-score that is not between precision and recall.\n    ``'samples'``:\n        Calculate metrics for each instance, and find their average (only\n        meaningful for multilabel classification where this differs from\n        :func:`accuracy_score`).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nzero_division : \"warn\", 0 or 1, default=\"warn\"\n    Sets the value to return when there is a zero division, i.e. when all\n    predictions and labels are negative. If set to \"warn\", this acts as 0,\n    but warnings are also raised.\n\nReturns\n-------\nf1_score : float or array of float, shape = [n_unique_labels]\n    F1 score of the positive class in binary classification or weighted\n    average of the F1 scores of each class for the multiclass task.\n\nSee Also\n--------\nfbeta_score : Compute the F-beta score.\nprecision_recall_fscore_support : Compute the precision, recall, F-score,\n    and support.\njaccard_score : Compute the Jaccard similarity coefficient score.\nmultilabel_confusion_matrix : Compute a confusion matrix for each class or\n    sample.\n\nNotes\n-----\nWhen ``true positive + false positive == 0``, precision is undefined.\nWhen ``true positive + false negative == 0``, recall is undefined.\nIn such cases, by default the metric will be set to 0, as will f-score,\nand ``UndefinedMetricWarning`` will be raised. This behavior can be\nmodified with ``zero_division``.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the F1-score\n       <https://en.wikipedia.org/wiki/F1_score>`_.\n\nExamples\n--------\n>>> from sklearn.metrics import f1_score\n>>> y_true = [0, 1, 2, 0, 1, 2]\n>>> y_pred = [0, 2, 1, 0, 0, 1]\n>>> f1_score(y_true, y_pred, average='macro')\n0.26...\n>>> f1_score(y_true, y_pred, average='micro')\n0.33...\n>>> f1_score(y_true, y_pred, average='weighted')\n0.26...\n>>> f1_score(y_true, y_pred, average=None)\narray([0.8, 0. , 0. ])\n>>> y_true = [0, 0, 0, 0, 0, 0]\n>>> y_pred = [0, 0, 0, 0, 0, 0]\n>>> f1_score(y_true, y_pred, zero_division=1)\n1.0...\n>>> # multilabel classification\n>>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]\n>>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]]\n>>> f1_score(y_true, y_pred, average=None)\narray([0.66666667, 1.        , 0.66666667])"
         },
         {
@@ -169064,7 +169064,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "None",
-                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\nParameter `labels` improved for multiclass problem."
+                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n   Parameter `labels` improved for multiclass problem."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -169107,14 +169107,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} or None",
                         "default_value": "'binary'",
-                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance; it can result in an\nF-score that is not between precision and recall.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification where this differs from\n:func:`accuracy_score`)."
+                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance; it can result in an\n    F-score that is not between precision and recall.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification where this differs from\n    :func:`accuracy_score`)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "weighted", "macro", "micro"]
+                                "values": ["samples", "weighted", "micro", "binary", "macro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -169451,14 +169451,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted',             'binary'} or None",
                         "default_value": "'binary'",
-                        "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average, weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification)."
+                        "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "weighted", "macro", "micro"]
+                                "values": ["samples", "weighted", "micro", "binary", "macro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -169647,7 +169647,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\nL_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.",
+            "description": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n    L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.",
             "docstring": "Log loss, aka logistic loss or cross-entropy loss.\n\nThis is the loss function used in (multinomial) logistic regression\nand extensions of it such as neural networks, defined as the negative\nlog-likelihood of a logistic model that returns ``y_pred`` probabilities\nfor its training data ``y_true``.\nThe log loss is only defined for two or more labels.\nFor a single sample with true label :math:`y \\in \\{0,1\\}` and\na probability estimate :math:`p = \\operatorname{Pr}(y = 1)`, the log\nloss is:\n\n.. math::\n    L_{\\log}(y, p) = -(y \\log (p) + (1 - y) \\log (1 - p))\n\nRead more in the :ref:`User Guide <log_loss>`.\n\nParameters\n----------\ny_true : array-like or label indicator matrix\n    Ground truth (correct) labels for n_samples samples.\n\ny_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)\n    Predicted probabilities, as returned by a classifier's\n    predict_proba method. If ``y_pred.shape = (n_samples,)``\n    the probabilities provided are assumed to be that of the\n    positive class. The labels in ``y_pred`` are assumed to be\n    ordered alphabetically, as done by\n    :class:`preprocessing.LabelBinarizer`.\n\neps : float, default=1e-15\n    Log loss is undefined for p=0 or p=1, so probabilities are\n    clipped to max(eps, min(1 - eps, p)).\n\nnormalize : bool, default=True\n    If true, return the mean loss per sample.\n    Otherwise, return the sum of the per-sample losses.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nlabels : array-like, default=None\n    If not provided, labels will be inferred from y_true. If ``labels``\n    is ``None`` and ``y_pred`` has shape (n_samples,) the labels are\n    assumed to be binary and are inferred from ``y_true``.\n\n    .. versionadded:: 0.18\n\nReturns\n-------\nloss : float\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nExamples\n--------\n>>> from sklearn.metrics import log_loss\n>>> log_loss([\"spam\", \"ham\", \"ham\", \"spam\"],\n...          [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])\n0.21616...\n\nReferences\n----------\nC.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,\np. 209."
         },
         {
@@ -169976,11 +169976,11 @@
                     "docstring": {
                         "type": "{'binary', 'micro', 'macro', 'samples', 'weighted'}",
                         "default_value": "None",
-                        "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance; it can result in an\nF-score that is not between precision and recall.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification where this differs from\n:func:`accuracy_score`)."
+                        "description": "If ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance; it can result in an\n    F-score that is not between precision and recall.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification where this differs from\n    :func:`accuracy_score`)."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["binary", "samples", "weighted", "macro", "micro"]
+                        "values": ["samples", "weighted", "micro", "binary", "macro"]
                     }
                 },
                 {
@@ -170040,7 +170040,7 @@
                     "docstring": {
                         "type": "\"warn\", 0 or 1",
                         "default_value": "\"warn\"",
-                        "description": "Sets the value to return when there is a zero division:\n- recall: when there are no positive labels\n- precision: when there are no positive predictions\n- f-score: both\n\nIf set to \"warn\", this acts as 0, but warnings are also raised."
+                        "description": "Sets the value to return when there is a zero division:\n   - recall: when there are no positive labels\n   - precision: when there are no positive predictions\n   - f-score: both\n\nIf set to \"warn\", this acts as 0, but warnings are also raised."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -170135,7 +170135,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "None",
-                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\nParameter `labels` improved for multiclass problem."
+                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n   Parameter `labels` improved for multiclass problem."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -170178,14 +170178,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} or None",
                         "default_value": "'binary'",
-                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance; it can result in an\nF-score that is not between precision and recall.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification where this differs from\n:func:`accuracy_score`)."
+                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance; it can result in an\n    F-score that is not between precision and recall.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification where this differs from\n    :func:`accuracy_score`)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "weighted", "macro", "micro"]
+                                "values": ["samples", "weighted", "micro", "binary", "macro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -170316,7 +170316,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "None",
-                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\nParameter `labels` improved for multiclass problem."
+                        "description": "The set of labels to include when ``average != 'binary'``, and their\norder if ``average is None``. Labels present in the data can be\nexcluded, for example to calculate a multiclass average ignoring a\nmajority negative class, while labels not present in the data will\nresult in 0 components in a macro average. For multilabel targets,\nlabels are column indices. By default, all labels in ``y_true`` and\n``y_pred`` are used in sorted order.\n\n.. versionchanged:: 0.17\n   Parameter `labels` improved for multiclass problem."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -170359,14 +170359,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted', 'binary'} or None",
                         "default_value": "'binary'",
-                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\nOnly report results for the class specified by ``pos_label``.\nThis is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\nCalculate metrics globally by counting the total true positives,\nfalse negatives and false positives.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average weighted\nby support (the number of true instances for each label). This\nalters 'macro' to account for label imbalance; it can result in an\nF-score that is not between precision and recall. Weighted recall\nis equal to accuracy.\n``'samples'``:\nCalculate metrics for each instance, and find their average (only\nmeaningful for multilabel classification where this differs from\n:func:`accuracy_score`)."
+                        "description": "This parameter is required for multiclass/multilabel targets.\nIf ``None``, the scores for each class are returned. Otherwise, this\ndetermines the type of averaging performed on the data:\n\n``'binary'``:\n    Only report results for the class specified by ``pos_label``.\n    This is applicable only if targets (``y_{true,pred}``) are binary.\n``'micro'``:\n    Calculate metrics globally by counting the total true positives,\n    false negatives and false positives.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average weighted\n    by support (the number of true instances for each label). This\n    alters 'macro' to account for label imbalance; it can result in an\n    F-score that is not between precision and recall. Weighted recall\n    is equal to accuracy.\n``'samples'``:\n    Calculate metrics for each instance, and find their average (only\n    meaningful for multilabel classification where this differs from\n    :func:`accuracy_score`)."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["binary", "samples", "weighted", "macro", "micro"]
+                                "values": ["samples", "weighted", "micro", "binary", "macro"]
                             },
                             {
                                 "kind": "NamedType",
@@ -170850,11 +170850,11 @@
                     "docstring": {
                         "type": "{'true', 'pred', 'all'}",
                         "default_value": "None",
-                        "description": "Either to normalize the counts display in the matrix:\n\n- if `'true'`, the confusion matrix is normalized over the true\nconditions (e.g. rows);\n- if `'pred'`, the confusion matrix is normalized over the\npredicted conditions (e.g. columns);\n- if `'all'`, the confusion matrix is normalized by the total\nnumber of samples;\n- if `None` (default), the confusion matrix will not be normalized."
+                        "description": "Either to normalize the counts display in the matrix:\n\n- if `'true'`, the confusion matrix is normalized over the true\n  conditions (e.g. rows);\n- if `'pred'`, the confusion matrix is normalized over the\n  predicted conditions (e.g. columns);\n- if `'all'`, the confusion matrix is normalized by the total\n  number of samples;\n- if `None` (default), the confusion matrix will not be normalized."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["all", "true", "pred"]
+                        "values": ["pred", "true", "all"]
                     }
                 },
                 {
@@ -170908,7 +170908,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171116,11 +171116,11 @@
                     "docstring": {
                         "type": "{'true', 'pred', 'all'}",
                         "default_value": "None",
-                        "description": "Either to normalize the counts display in the matrix:\n\n- if `'true'`, the confusion matrix is normalized over the true\nconditions (e.g. rows);\n- if `'pred'`, the confusion matrix is normalized over the\npredicted conditions (e.g. columns);\n- if `'all'`, the confusion matrix is normalized by the total\nnumber of samples;\n- if `None` (default), the confusion matrix will not be normalized."
+                        "description": "Either to normalize the counts display in the matrix:\n\n- if `'true'`, the confusion matrix is normalized over the true\n  conditions (e.g. rows);\n- if `'pred'`, the confusion matrix is normalized over the\n  predicted conditions (e.g. columns);\n- if `'all'`, the confusion matrix is normalized by the total\n  number of samples;\n- if `None` (default), the confusion matrix will not be normalized."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["all", "true", "pred"]
+                        "values": ["pred", "true", "all"]
                     }
                 },
                 {
@@ -171174,7 +171174,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171364,7 +171364,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171560,11 +171560,11 @@
                     "docstring": {
                         "type": "{'true', 'pred', 'all'}",
                         "default_value": "None",
-                        "description": "Either to normalize the counts display in the matrix:\n\n- if `'true'`, the confusion matrix is normalized over the true\nconditions (e.g. rows);\n- if `'pred'`, the confusion matrix is normalized over the\npredicted conditions (e.g. columns);\n- if `'all'`, the confusion matrix is normalized by the total\nnumber of samples;\n- if `None` (default), the confusion matrix will not be normalized."
+                        "description": "Either to normalize the counts display in the matrix:\n\n    - if `'true'`, the confusion matrix is normalized over the true\n      conditions (e.g. rows);\n    - if `'pred'`, the confusion matrix is normalized over the\n      predicted conditions (e.g. columns);\n    - if `'all'`, the confusion matrix is normalized by the total\n      number of samples;\n    - if `None` (default), the confusion matrix will not be normalized."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["all", "true", "pred"]
+                        "values": ["pred", "true", "all"]
                     }
                 },
                 {
@@ -171618,7 +171618,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["horizontal", "vertical"]
+                                "values": ["vertical", "horizontal"]
                             },
                             {
                                 "kind": "NamedType",
@@ -171926,7 +171926,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -172345,7 +172345,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -172429,7 +172429,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n`plot_det_curve` is deprecated in 1.0 and will be removed in\n1.2. Use one of the following class methods:\n:func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n:func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.",
+            "description": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n   `plot_det_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n   :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.",
             "docstring": "Plot detection error tradeoff (DET) curve.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <visualizations>`.\n\n.. versionadded:: 0.24\n\n.. deprecated:: 1.0\n   `plot_det_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.DetCurveDisplay.from_predictions` or\n   :func:`~sklearn.metrics.DetCurveDisplay.from_estimator`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'}             default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the predicted target response. If set to\n    'auto', :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name of DET curve for labeling. If `None`, use the name of the\n    estimator.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The label of the positive class.\n    When `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1},\n    `pos_label` is set to 1, otherwise an error will be raised.\n\n**kwargs : dict\n        Additional keywords arguments passed to matplotlib `plot` function.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.DetCurveDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\ndet_curve : Compute error rates for different probability thresholds.\nDetCurveDisplay : DET curve visualization.\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\nRocCurveDisplay.from_estimator : Plot Receiver Operating Characteristic\n    (ROC) curve given an estimator and some data.\nRocCurveDisplay.from_predictions : Plot Receiver Operating Characteristic\n    (ROC) curve given the true and predicted values.\n\nExamples\n--------\n>>> import matplotlib.pyplot as plt\n>>> from sklearn.datasets import make_classification\n>>> from sklearn.metrics import plot_det_curve\n>>> from sklearn.model_selection import train_test_split\n>>> from sklearn.svm import SVC\n>>> X, y = make_classification(n_samples=1000, random_state=0)\n>>> X_train, X_test, y_train, y_test = train_test_split(\n...     X, y, test_size=0.4, random_state=0)\n>>> clf = SVC(random_state=0).fit(X_train, y_train)\n>>> plot_det_curve(clf, X_test, y_test)  # doctest: +SKIP\n<...>\n>>> plt.show()"
         },
         {
@@ -172690,7 +172690,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -173083,7 +173083,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -173167,7 +173167,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n.. deprecated:: 1.0\n`plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n1.2. Use one of the following class methods:\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n:func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.",
+            "description": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n.. deprecated:: 1.0\n   `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.",
             "docstring": "Plot Precision Recall Curve for binary classifiers.\n\nExtra keyword arguments will be passed to matplotlib's `plot`.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\n.. deprecated:: 1.0\n   `plot_precision_recall_curve` is deprecated in 1.0 and will be removed in\n   1.2. Use one of the following class methods:\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` or\n   :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator`.\n\nParameters\n----------\nestimator : estimator instance\n    Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`\n    in which the last estimator is a classifier.\n\nX : {array-like, sparse matrix} of shape (n_samples, n_features)\n    Input values.\n\ny : array-like of shape (n_samples,)\n    Binary target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nresponse_method : {'predict_proba', 'decision_function', 'auto'},                       default='auto'\n    Specifies whether to use :term:`predict_proba` or\n    :term:`decision_function` as the target response. If set to 'auto',\n    :term:`predict_proba` is tried first and if it does not exist\n    :term:`decision_function` is tried next.\n\nname : str, default=None\n    Name for labeling curve. If `None`, the name of the\n    estimator is used.\n\nax : matplotlib axes, default=None\n    Axes object to plot on. If `None`, a new figure and axes is created.\n\npos_label : str or int, default=None\n    The class considered as the positive class when computing the precision\n    and recall metrics. By default, `estimators.classes_[1]` is considered\n    as the positive class.\n\n    .. versionadded:: 0.24\n\n**kwargs : dict\n    Keyword arguments to be passed to matplotlib's `plot`.\n\nReturns\n-------\ndisplay : :class:`~sklearn.metrics.PrecisionRecallDisplay`\n    Object that stores computed values.\n\nSee Also\n--------\nprecision_recall_curve : Compute precision-recall pairs for different\n    probability thresholds.\nPrecisionRecallDisplay : Precision Recall visualization."
         },
         {
@@ -173419,7 +173419,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -173869,7 +173869,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["auto", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "auto"]
                     }
                 },
                 {
@@ -174316,7 +174316,7 @@
                     "docstring": {
                         "type": "{'ovr', 'ovo'}",
                         "default_value": "",
-                        "description": "Determines the type of multiclass configuration to use.\n``'ovr'``:\nCalculate metrics for the multiclass case using the one-vs-rest\napproach.\n``'ovo'``:\nCalculate metrics for the multiclass case using the one-vs-one\napproach."
+                        "description": "Determines the type of multiclass configuration to use.\n``'ovr'``:\n    Calculate metrics for the multiclass case using the one-vs-rest\n    approach.\n``'ovo'``:\n    Calculate metrics for the multiclass case using the one-vs-one\n    approach."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -174333,11 +174333,11 @@
                     "docstring": {
                         "type": "{'macro', 'weighted'}",
                         "default_value": "",
-                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean. This does not take label imbalance into account. Classes\nare assumed to be uniformly distributed.\n``'weighted'``:\nCalculate metrics for each label, taking into account the\nprevalence of the classes."
+                        "description": "Determines the type of averaging performed on the pairwise binary\nmetric scores\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean. This does not take label imbalance into account. Classes\n    are assumed to be uniformly distributed.\n``'weighted'``:\n    Calculate metrics for each label, taking into account the\n    prevalence of the classes."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["weighted", "macro"]
+                        "values": ["macro", "weighted"]
                     }
                 },
                 {
@@ -174624,14 +174624,14 @@
                     "docstring": {
                         "type": "{'micro', 'samples', 'weighted', 'macro'} or None",
                         "default_value": "'macro'",
-                        "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\nCalculate metrics globally by considering each element of the label\nindicator matrix as a label.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average, weighted\nby support (the number of true instances for each label).\n``'samples'``:\nCalculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
+                        "description": "If ``None``, the scores for each class are returned. Otherwise,\nthis determines the type of averaging performed on the data:\n\n``'micro'``:\n    Calculate metrics globally by considering each element of the label\n    indicator matrix as a label.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label).\n``'samples'``:\n    Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["macro", "weighted", "micro", "samples"]
+                                "values": ["samples", "macro", "micro", "weighted"]
                             },
                             {
                                 "kind": "NamedType",
@@ -174687,7 +174687,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n\\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
+            "description": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n    \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.",
             "docstring": "Compute average precision (AP) from prediction scores.\n\nAP summarizes a precision-recall curve as the weighted mean of precisions\nachieved at each threshold, with the increase in recall from the previous\nthreshold used as the weight:\n\n.. math::\n    \\text{AP} = \\sum_n (R_n - R_{n-1}) P_n\n\nwhere :math:`P_n` and :math:`R_n` are the precision and recall at the nth\nthreshold [1]_. This implementation is not interpolated and is different\nfrom computing the area under the precision-recall curve with the\ntrapezoidal rule, which uses linear interpolation and can be too\noptimistic.\n\nNote: this implementation is restricted to the binary classification task\nor multilabel classification task.\n\nRead more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,) or (n_samples, n_classes)\n    True binary labels or binary label indicators.\n\ny_score : ndarray of shape (n_samples,) or (n_samples, n_classes)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by :term:`decision_function` on some classifiers).\n\naverage : {'micro', 'samples', 'weighted', 'macro'} or None,             default='macro'\n    If ``None``, the scores for each class are returned. Otherwise,\n    this determines the type of averaging performed on the data:\n\n    ``'micro'``:\n        Calculate metrics globally by considering each element of the label\n        indicator matrix as a label.\n    ``'macro'``:\n        Calculate metrics for each label, and find their unweighted\n        mean.  This does not take label imbalance into account.\n    ``'weighted'``:\n        Calculate metrics for each label, and find their average, weighted\n        by support (the number of true instances for each label).\n    ``'samples'``:\n        Calculate metrics for each instance, and find their average.\n\n    Will be ignored when ``y_true`` is binary.\n\npos_label : int or str, default=1\n    The label of the positive class. Only applied to binary ``y_true``.\n    For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\naverage_precision : float\n    Average precision score.\n\nSee Also\n--------\nroc_auc_score : Compute the area under the ROC curve.\nprecision_recall_curve : Compute precision-recall pairs for different\n    probability thresholds.\n\nNotes\n-----\n.. versionchanged:: 0.19\n  Instead of linearly interpolating between operating points, precisions\n  are weighted by the change in recall since the last operating point.\n\nReferences\n----------\n.. [1] `Wikipedia entry for the Average precision\n       <https://en.wikipedia.org/w/index.php?title=Information_retrieval&\n       oldid=793358396#Average_precision>`_\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import average_precision_score\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> average_precision_score(y_true, y_scores)\n0.83..."
         },
         {
@@ -174956,7 +174956,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute error rates for different probability thresholds.\n\n.. note::\nThis metric is used for evaluation of ranking and error tradeoffs of\na binary classification task.\n\nRead more in the :ref:`User Guide <det_curve>`.\n\n.. versionadded:: 0.24",
+            "description": "Compute error rates for different probability thresholds.\n\n.. note::\n   This metric is used for evaluation of ranking and error tradeoffs of\n   a binary classification task.\n\nRead more in the :ref:`User Guide <det_curve>`.\n\n.. versionadded:: 0.24",
             "docstring": "Compute error rates for different probability thresholds.\n\n.. note::\n   This metric is used for evaluation of ranking and error tradeoffs of\n   a binary classification task.\n\nRead more in the :ref:`User Guide <det_curve>`.\n\n.. versionadded:: 0.24\n\nParameters\n----------\ny_true : ndarray of shape (n_samples,)\n    True binary labels. If labels are not either {-1, 1} or {0, 1}, then\n    pos_label should be explicitly given.\n\ny_score : ndarray of shape of (n_samples,)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\npos_label : int or str, default=None\n    The label of the positive class.\n    When ``pos_label=None``, if `y_true` is in {-1, 1} or {0, 1},\n    ``pos_label`` is set to 1, otherwise an error will be raised.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nfpr : ndarray of shape (n_thresholds,)\n    False positive rate (FPR) such that element i is the false positive\n    rate of predictions with score >= thresholds[i]. This is occasionally\n    referred to as false acceptance propability or fall-out.\n\nfnr : ndarray of shape (n_thresholds,)\n    False negative rate (FNR) such that element i is the false negative\n    rate of predictions with score >= thresholds[i]. This is occasionally\n    referred to as false rejection or miss rate.\n\nthresholds : ndarray of shape (n_thresholds,)\n    Decreasing score values.\n\nSee Also\n--------\nDetCurveDisplay.from_estimator : Plot DET curve given an estimator and\n    some data.\nDetCurveDisplay.from_predictions : Plot DET curve given the true and\n    predicted labels.\nDetCurveDisplay : DET curve visualization.\nroc_curve : Compute Receiver operating characteristic (ROC) curve.\nprecision_recall_curve : Compute precision-recall curve.\n\nExamples\n--------\n>>> import numpy as np\n>>> from sklearn.metrics import det_curve\n>>> y_true = np.array([0, 0, 1, 1])\n>>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])\n>>> fpr, fnr, thresholds = det_curve(y_true, y_scores)\n>>> fpr\narray([0.5, 0.5, 0. ])\n>>> fnr\narray([0. , 0.5, 0.5])\n>>> thresholds\narray([0.35, 0.4 , 0.8 ])"
         },
         {
@@ -175102,7 +175102,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide <label_ranking_loss>`.\n\n.. versionadded:: 0.17\nA function *label_ranking_loss*",
+            "description": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide <label_ranking_loss>`.\n\n.. versionadded:: 0.17\n   A function *label_ranking_loss*",
             "docstring": "Compute Ranking loss measure.\n\nCompute the average number of label pairs that are incorrectly ordered\ngiven y_score weighted by the size of the label set and the number of\nlabels not in the label set.\n\nThis is similar to the error set size, but weighted by the number of\nrelevant and irrelevant labels. The best performance is achieved with\na ranking loss of zero.\n\nRead more in the :ref:`User Guide <label_ranking_loss>`.\n\n.. versionadded:: 0.17\n   A function *label_ranking_loss*\n\nParameters\n----------\ny_true : {ndarray, sparse matrix} of shape (n_samples, n_labels)\n    True binary labels in binary indicator format.\n\ny_score : ndarray of shape (n_samples, n_labels)\n    Target scores, can either be probability estimates of the positive\n    class, confidence values, or non-thresholded measure of decisions\n    (as returned by \"decision_function\" on some classifiers).\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nReturns\n-------\nloss : float\n    Average number of label pairs that are incorrectly ordered given\n    y_score weighted by the size of the label set and the number of labels not\n    in the label set.\n\nReferences\n----------\n.. [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010).\n       Mining multi-label data. In Data mining and knowledge discovery\n       handbook (pp. 667-685). Springer US."
         },
         {
@@ -175326,7 +175326,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,) or (n_samples, n_classes)",
                         "default_value": "",
-                        "description": "Target scores.\n\n* In the binary case, it corresponds to an array of shape\n`(n_samples,)`. Both probability estimates and non-thresholded\ndecision values can be provided. The probability estimates correspond\nto the **probability of the class with the greater label**,\ni.e. `estimator.classes_[1]` and thus\n`estimator.predict_proba(X, y)[:, 1]`. The decision values\ncorresponds to the output of `estimator.decision_function(X, y)`.\nSee more information in the :ref:`User guide <roc_auc_binary>`;\n* In the multiclass case, it corresponds to an array of shape\n`(n_samples, n_classes)` of probability estimates provided by the\n`predict_proba` method. The probability estimates **must**\nsum to 1 across the possible classes. In addition, the order of the\nclass scores must correspond to the order of ``labels``,\nif provided, or else to the numerical or lexicographical order of\nthe labels in ``y_true``. See more information in the\n:ref:`User guide <roc_auc_multiclass>`;\n* In the multilabel case, it corresponds to an array of shape\n`(n_samples, n_classes)`. Probability estimates are provided by the\n`predict_proba` method and the non-thresholded decision values by\nthe `decision_function` method. The probability estimates correspond\nto the **probability of the class with the greater label for each\noutput** of the classifier. See more information in the\n:ref:`User guide <roc_auc_multilabel>`."
+                        "description": "Target scores.\n\n* In the binary case, it corresponds to an array of shape\n  `(n_samples,)`. Both probability estimates and non-thresholded\n  decision values can be provided. The probability estimates correspond\n  to the **probability of the class with the greater label**,\n  i.e. `estimator.classes_[1]` and thus\n  `estimator.predict_proba(X, y)[:, 1]`. The decision values\n  corresponds to the output of `estimator.decision_function(X, y)`.\n  See more information in the :ref:`User guide <roc_auc_binary>`;\n* In the multiclass case, it corresponds to an array of shape\n  `(n_samples, n_classes)` of probability estimates provided by the\n  `predict_proba` method. The probability estimates **must**\n  sum to 1 across the possible classes. In addition, the order of the\n  class scores must correspond to the order of ``labels``,\n  if provided, or else to the numerical or lexicographical order of\n  the labels in ``y_true``. See more information in the\n  :ref:`User guide <roc_auc_multiclass>`;\n* In the multilabel case, it corresponds to an array of shape\n  `(n_samples, n_classes)`. Probability estimates are provided by the\n  `predict_proba` method and the non-thresholded decision values by\n  the `decision_function` method. The probability estimates correspond\n  to the **probability of the class with the greater label for each\n  output** of the classifier. See more information in the\n  :ref:`User guide <roc_auc_multilabel>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -175343,14 +175343,14 @@
                     "docstring": {
                         "type": "{'micro', 'macro', 'samples', 'weighted'} or None",
                         "default_value": "'macro'",
-                        "description": "If ``None``, the scores for each class are returned.\nOtherwise, this determines the type of averaging performed on the data.\nNote: multiclass ROC AUC currently only handles the 'macro' and\n'weighted' averages. For multiclass targets, `average=None`\nis only implemented for `multi_class='ovo'`.\n\n``'micro'``:\nCalculate metrics globally by considering each element of the label\nindicator matrix as a label.\n``'macro'``:\nCalculate metrics for each label, and find their unweighted\nmean.  This does not take label imbalance into account.\n``'weighted'``:\nCalculate metrics for each label, and find their average, weighted\nby support (the number of true instances for each label).\n``'samples'``:\nCalculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
+                        "description": "If ``None``, the scores for each class are returned.\nOtherwise, this determines the type of averaging performed on the data.\nNote: multiclass ROC AUC currently only handles the 'macro' and\n'weighted' averages. For multiclass targets, `average=None`\nis only implemented for `multi_class='ovo'`.\n\n``'micro'``:\n    Calculate metrics globally by considering each element of the label\n    indicator matrix as a label.\n``'macro'``:\n    Calculate metrics for each label, and find their unweighted\n    mean.  This does not take label imbalance into account.\n``'weighted'``:\n    Calculate metrics for each label, and find their average, weighted\n    by support (the number of true instances for each label).\n``'samples'``:\n    Calculate metrics for each instance, and find their average.\n\nWill be ignored when ``y_true`` is binary."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["weighted", "samples", "micro", "macro"]
+                                "values": ["samples", "macro", "micro", "weighted"]
                             },
                             {
                                 "kind": "NamedType",
@@ -175403,7 +175403,7 @@
                     "docstring": {
                         "type": "{'raise', 'ovr', 'ovo'}",
                         "default_value": "'raise'",
-                        "description": "Only used for multiclass targets. Determines the type of configuration\nto use. The default value raises an error, so either\n``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n``'ovr'``:\nStands for One-vs-rest. Computes the AUC of each class\nagainst the rest [3]_ [4]_. This\ntreats the multiclass case in the same way as the multilabel case.\nSensitive to class imbalance even when ``average == 'macro'``,\nbecause class imbalance affects the composition of each of the\n'rest' groupings.\n``'ovo'``:\nStands for One-vs-one. Computes the average AUC of all\npossible pairwise combinations of classes [5]_.\nInsensitive to class imbalance when\n``average == 'macro'``."
+                        "description": "Only used for multiclass targets. Determines the type of configuration\nto use. The default value raises an error, so either\n``'ovr'`` or ``'ovo'`` must be passed explicitly.\n\n``'ovr'``:\n    Stands for One-vs-rest. Computes the AUC of each class\n    against the rest [3]_ [4]_. This\n    treats the multiclass case in the same way as the multilabel case.\n    Sensitive to class imbalance even when ``average == 'macro'``,\n    because class imbalance affects the composition of each of the\n    'rest' groupings.\n``'ovo'``:\n    Stands for One-vs-one. Computes the average AUC of all\n    possible pairwise combinations of classes [5]_.\n    Insensitive to class imbalance when\n    ``average == 'macro'``."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -175527,7 +175527,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Whether to drop some suboptimal thresholds which would not appear\non a plotted ROC curve. This is useful in order to create lighter\nROC curves.\n\n.. versionadded:: 0.17\nparameter *drop_intermediate*."
+                        "description": "Whether to drop some suboptimal thresholds which would not appear\non a plotted ROC curve. This is useful in order to create lighter\nROC curves.\n\n.. versionadded:: 0.17\n   parameter *drop_intermediate*."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -175978,7 +175978,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nScores of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Scores of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176085,7 +176085,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nScores of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Scores of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176175,7 +176175,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0",
-                        "description": "Tweedie power parameter. Either power <= 0 or power >= 1.\n\nThe higher `p` the less weight is given to extreme\ndeviations between true and predicted targets.\n\n- power < 0: Extreme stable distribution. Requires: y_pred > 0.\n- power = 0 : Normal distribution, output corresponds to r2_score.\ny_true and y_pred can be any real numbers.\n- power = 1 : Poisson distribution. Requires: y_true >= 0 and\ny_pred > 0.\n- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\nand y_pred > 0.\n- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\nand y_pred > 0.\n- otherwise : Positive stable distribution. Requires: y_true > 0\nand y_pred > 0."
+                        "description": "Tweedie power parameter. Either power <= 0 or power >= 1.\n\nThe higher `p` the less weight is given to extreme\ndeviations between true and predicted targets.\n\n- power < 0: Extreme stable distribution. Requires: y_pred > 0.\n- power = 0 : Normal distribution, output corresponds to r2_score.\n  y_true and y_pred can be any real numbers.\n- power = 1 : Poisson distribution. Requires: y_true >= 0 and\n  y_pred > 0.\n- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n  and y_pred > 0.\n- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n  and y_pred > 0.\n- otherwise : Positive stable distribution. Requires: y_true > 0\n  and y_pred > 0."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -176256,14 +176256,14 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average', 'variance_weighted'} or             array-like of shape (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\nReturns a full set of scores in case of multioutput input.\n\n'uniform_average' :\nScores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\nScores of all outputs are averaged, weighted by the variances\nof each individual output."
+                        "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\n\n'raw_values' :\n    Returns a full set of scores in case of multioutput input.\n\n'uniform_average' :\n    Scores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\n    Scores of all outputs are averaged, weighted by the variances\n    of each individual output."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform_average", "raw_values", "variance_weighted"]
+                                "values": ["variance_weighted", "uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -176293,7 +176293,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nIn the particular case when ``y_true`` is constant, the explained variance\nscore is not finite: it is either ``NaN`` (perfect predictions) or\n``-Inf`` (imperfect predictions). To prevent such non-finite numbers to\npollute higher-level experiments such as a grid search cross-validation,\nby default these cases are replaced with 1.0 (perfect predictions) or 0.0\n(imperfect predictions) respectively. If ``force_finite``\nis set to ``False``, this score falls back on the original :math:`R^2`\ndefinition.\n\n.. note::\nThe Explained Variance score is similar to the\n:func:`R^2 score <r2_score>`, with the notable difference that it\ndoes not account for systematic offsets in the prediction. Most often\nthe :func:`R^2 score <r2_score>` should be preferred.\n\nRead more in the :ref:`User Guide <explained_variance_score>`.",
+            "description": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nIn the particular case when ``y_true`` is constant, the explained variance\nscore is not finite: it is either ``NaN`` (perfect predictions) or\n``-Inf`` (imperfect predictions). To prevent such non-finite numbers to\npollute higher-level experiments such as a grid search cross-validation,\nby default these cases are replaced with 1.0 (perfect predictions) or 0.0\n(imperfect predictions) respectively. If ``force_finite``\nis set to ``False``, this score falls back on the original :math:`R^2`\ndefinition.\n\n.. note::\n   The Explained Variance score is similar to the\n   :func:`R^2 score <r2_score>`, with the notable difference that it\n   does not account for systematic offsets in the prediction. Most often\n   the :func:`R^2 score <r2_score>` should be preferred.\n\nRead more in the :ref:`User Guide <explained_variance_score>`.",
             "docstring": "Explained variance regression score function.\n\nBest possible score is 1.0, lower values are worse.\n\nIn the particular case when ``y_true`` is constant, the explained variance\nscore is not finite: it is either ``NaN`` (perfect predictions) or\n``-Inf`` (imperfect predictions). To prevent such non-finite numbers to\npollute higher-level experiments such as a grid search cross-validation,\nby default these cases are replaced with 1.0 (perfect predictions) or 0.0\n(imperfect predictions) respectively. If ``force_finite``\nis set to ``False``, this score falls back on the original :math:`R^2`\ndefinition.\n\n.. note::\n   The Explained Variance score is similar to the\n   :func:`R^2 score <r2_score>`, with the notable difference that it\n   does not account for systematic offsets in the prediction. Most often\n   the :func:`R^2 score <r2_score>` should be preferred.\n\nRead more in the :ref:`User Guide <explained_variance_score>`.\n\nParameters\n----------\ny_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Ground truth (correct) target values.\n\ny_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n    Estimated target values.\n\nsample_weight : array-like of shape (n_samples,), default=None\n    Sample weights.\n\nmultioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or             array-like of shape (n_outputs,), default='uniform_average'\n    Defines aggregating of multiple output scores.\n    Array-like value defines weights used to average scores.\n\n    'raw_values' :\n        Returns a full set of scores in case of multioutput input.\n\n    'uniform_average' :\n        Scores of all outputs are averaged with uniform weight.\n\n    'variance_weighted' :\n        Scores of all outputs are averaged, weighted by the variances\n        of each individual output.\n\nforce_finite : bool, default=True\n    Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant\n    data should be replaced with real numbers (``1.0`` if prediction is\n    perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting\n    for hyperparameters' search procedures (e.g. grid search\n    cross-validation).\n\n    .. versionadded:: 1.1\n\nReturns\n-------\nscore : float or ndarray of floats\n    The explained variance or ndarray if 'multioutput' is 'raw_values'.\n\nSee Also\n--------\nr2_score :\n    Similar metric, but accounting for systematic offsets in\n    prediction.\n\nNotes\n-----\nThis is not a symmetric function.\n\nExamples\n--------\n>>> from sklearn.metrics import explained_variance_score\n>>> y_true = [3, -0.5, 2, 7]\n>>> y_pred = [2.5, 0.0, 2, 8]\n>>> explained_variance_score(y_true, y_pred)\n0.957...\n>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n>>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')\n0.983...\n>>> y_true = [-2, -2, -2]\n>>> y_pred = [-2, -2, -2]\n>>> explained_variance_score(y_true, y_pred)\n1.0\n>>> explained_variance_score(y_true, y_pred, force_finite=False)\nnan\n>>> y_true = [-2, -2, -2]\n>>> y_pred = [-2, -2, -2 + 1e-8]\n>>> explained_variance_score(y_true, y_pred)\n0.0\n>>> explained_variance_score(y_true, y_pred, force_finite=False)\n-inf"
         },
         {
@@ -176410,7 +176410,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'}  or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176500,7 +176500,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like",
                         "default_value": "",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\nIf input is list then the shape must be (n_outputs,).\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\nIf input is list then the shape must be (n_outputs,).\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176668,7 +176668,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'}  or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176822,7 +176822,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -176929,7 +176929,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\nReturns a full set of errors when the input is of multioutput\nformat.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values.\nArray-like value defines weights used to average errors.\n\n'raw_values' :\n    Returns a full set of errors when the input is of multioutput\n    format.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -177036,7 +177036,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0",
-                        "description": "Tweedie power parameter. Either power <= 0 or power >= 1.\n\nThe higher `p` the less weight is given to extreme\ndeviations between true and predicted targets.\n\n- power < 0: Extreme stable distribution. Requires: y_pred > 0.\n- power = 0 : Normal distribution, output corresponds to\nmean_squared_error. y_true and y_pred can be any real numbers.\n- power = 1 : Poisson distribution. Requires: y_true >= 0 and\ny_pred > 0.\n- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\nand y_pred > 0.\n- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\nand y_pred > 0.\n- otherwise : Positive stable distribution. Requires: y_true > 0\nand y_pred > 0."
+                        "description": "Tweedie power parameter. Either power <= 0 or power >= 1.\n\nThe higher `p` the less weight is given to extreme\ndeviations between true and predicted targets.\n\n- power < 0: Extreme stable distribution. Requires: y_pred > 0.\n- power = 0 : Normal distribution, output corresponds to\n  mean_squared_error. y_true and y_pred can be any real numbers.\n- power = 1 : Poisson distribution. Requires: y_true >= 0 and\n  y_pred > 0.\n- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0\n  and y_pred > 0.\n- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.\n- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0\n  and y_pred > 0.\n- otherwise : Positive stable distribution. Requires: y_true > 0\n  and y_pred > 0."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -177100,7 +177100,7 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average'} or array-like of shape             (n_outputs,)",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output values. Array-like value defines\nweights used to average errors.\n\n'raw_values' :\nReturns a full set of errors in case of multioutput input.\n\n'uniform_average' :\nErrors of all outputs are averaged with uniform weight."
+                        "description": "Defines aggregating of multiple output values. Array-like value defines\nweights used to average errors.\n\n'raw_values' :\n    Returns a full set of errors in case of multioutput input.\n\n'uniform_average' :\n    Errors of all outputs are averaged with uniform weight."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -177207,14 +177207,14 @@
                     "docstring": {
                         "type": "{'raw_values', 'uniform_average', 'variance_weighted'},             array-like of shape (n_outputs,) or None",
                         "default_value": "'uniform_average'",
-                        "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\nDefault is \"uniform_average\".\n\n'raw_values' :\nReturns a full set of scores in case of multioutput input.\n\n'uniform_average' :\nScores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\nScores of all outputs are averaged, weighted by the variances\nof each individual output.\n\n.. versionchanged:: 0.19\nDefault value of multioutput is 'uniform_average'."
+                        "description": "Defines aggregating of multiple output scores.\nArray-like value defines weights used to average scores.\nDefault is \"uniform_average\".\n\n'raw_values' :\n    Returns a full set of scores in case of multioutput input.\n\n'uniform_average' :\n    Scores of all outputs are averaged with uniform weight.\n\n'variance_weighted' :\n    Scores of all outputs are averaged, weighted by the variances\n    of each individual output.\n\n.. versionchanged:: 0.19\n    Default value of multioutput is 'uniform_average'."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["uniform_average", "raw_values", "variance_weighted"]
+                                "values": ["variance_weighted", "uniform_average", "raw_values"]
                             },
                             {
                                 "kind": "NamedType",
@@ -177760,7 +177760,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n- `_ProbaScorer` will be called twice.\n- `_PredictScorer` will be called twice.\n- `_ThresholdScorer` will be called twice.\n- `_ThresholdScorer` and `_PredictScorer` are called and\nestimator is a regressor.\n- `_ThresholdScorer` and `_ProbaScorer` are called and\nestimator does not have a `decision_function` attribute.",
+            "description": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n  - `_ProbaScorer` will be called twice.\n  - `_PredictScorer` will be called twice.\n  - `_ThresholdScorer` will be called twice.\n  - `_ThresholdScorer` and `_PredictScorer` are called and\n     estimator is a regressor.\n  - `_ThresholdScorer` and `_ProbaScorer` are called and\n     estimator does not have a `decision_function` attribute.",
             "docstring": "Return True if using a cache is beneficial.\n\nCaching may be beneficial when one of these conditions holds:\n  - `_ProbaScorer` will be called twice.\n  - `_PredictScorer` will be called twice.\n  - `_ThresholdScorer` will be called twice.\n  - `_ThresholdScorer` and `_PredictScorer` are called and\n     estimator is a regressor.\n  - `_ThresholdScorer` and `_ProbaScorer` are called and\n     estimator does not have a `decision_function` attribute."
         },
         {
@@ -178269,7 +178269,7 @@
                     "docstring": {
                         "type": "list, tuple or dict",
                         "default_value": "",
-                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nThe possibilities are:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where they keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
+                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nThe possibilities are:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where they keys are the metric\n  names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -178925,7 +178925,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'arithmetic'",
-                        "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\nThe default value of ``average_method`` changed from 'max' to\n'arithmetic'."
+                        "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\n   The default value of ``average_method`` changed from 'max' to\n   'arithmetic'."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -178936,7 +178936,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\nAMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.",
+            "description": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n    AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.",
             "docstring": "Adjusted Mutual Information between two clusterings.\n\nAdjusted Mutual Information (AMI) is an adjustment of the Mutual\nInformation (MI) score to account for chance. It accounts for the fact that\nthe MI is generally higher for two clusterings with a larger number of\nclusters, regardless of whether there is actually more information shared.\nFor two clusterings :math:`U` and :math:`V`, the AMI is given as::\n\n    AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (``label_true``)\nwith :math:`V` (``labels_pred``) will return the same score value. This can\nbe useful to measure the agreement of two independent label assignments\nstrategies on the same dataset when the real ground truth is not known.\n\nBe mindful that this function is an order of magnitude slower than other\nmetrics, such as the Adjusted Rand Index.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    A clustering of the data into disjoint subsets, called :math:`U` in\n    the above formula.\n\nlabels_pred : int array-like of shape (n_samples,)\n    A clustering of the data into disjoint subsets, called :math:`V` in\n    the above formula.\n\naverage_method : str, default='arithmetic'\n    How to compute the normalizer in the denominator. Possible options\n    are 'min', 'geometric', 'arithmetic', and 'max'.\n\n    .. versionadded:: 0.20\n\n    .. versionchanged:: 0.22\n       The default value of ``average_method`` changed from 'max' to\n       'arithmetic'.\n\nReturns\n-------\nami: float (upperlimited by 1.0)\n   The AMI returns a value of 1 when the two partitions are identical\n   (ie perfectly matched). Random partitions (independent labellings) have\n   an expected AMI around 0 on average hence can be negative. The value is\n   in adjusted nats (based on the natural logarithm).\n\nSee Also\n--------\nadjusted_rand_score : Adjusted Rand Index.\nmutual_info_score : Mutual Information (not adjusted for chance).\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import adjusted_mutual_info_score\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])\n  ... # doctest: +SKIP\n  1.0\n  >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])\n  ... # doctest: +SKIP\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally in-complete, hence the AMI is null::\n\n  >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])\n  ... # doctest: +SKIP\n  0.0\n\nReferences\n----------\n.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for\n   Clusterings Comparison: Variants, Properties, Normalization and\n   Correction for Chance, JMLR\n   <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_\n\n.. [2] `Wikipedia entry for the Adjusted Mutual Information\n   <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_"
         },
         {
@@ -178992,7 +178992,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\nARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\nadjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.",
+            "description": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.",
             "docstring": "Rand index adjusted for chance.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is then \"adjusted for chance\" into the ARI score\nusing the following scheme::\n\n    ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)\n\nThe adjusted Rand index is thus ensured to have a value close to\n0.0 for random labeling independently of the number of clusters and\nsamples and exactly 1.0 when the clusterings are identical (up to\na permutation).\n\nARI is a symmetric measure::\n\n    adjusted_rand_score(a, b) == adjusted_rand_score(b, a)\n\nRead more in the :ref:`User Guide <adjusted_rand_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    Ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n    Cluster labels to evaluate\n\nReturns\n-------\nARI : float\n   Similarity score between -1.0 and 1.0. Random labelings have an ARI\n   close to 0.0. 1.0 stands for perfect match.\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import adjusted_rand_score\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.57...\n\nARI is symmetric, so labelings that have pure clusters with members\ncoming from the same classes but unnecessary splits are penalized::\n\n  >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])\n  0.57...\n\nIf classes members are completely split across different clusters, the\nassignment is totally incomplete, hence the ARI is very low::\n\n  >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0\n\nReferences\n----------\n.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,\n  Journal of Classification 1985\n  https://link.springer.com/article/10.1007%2FBF01908075\n\n.. [Steinley2004] D. Steinley, Properties of the Hubert-Arabie\n  adjusted Rand index, Psychological Methods 2004\n\n.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted Mutual Information."
         },
         {
@@ -179323,7 +179323,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\nFMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.",
+            "description": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n    FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.",
             "docstring": "Measure the similarity of two clusterings of a set of points.\n\n.. versionadded:: 0.18\n\nThe Fowlkes-Mallows index (FMI) is defined as the geometric mean between of\nthe precision and recall::\n\n    FMI = TP / sqrt((TP + FP) * (TP + FN))\n\nWhere ``TP`` is the number of **True Positive** (i.e. the number of pair of\npoints that belongs in the same clusters in both ``labels_true`` and\n``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the\nnumber of pair of points that belongs in the same clusters in\n``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of\n**False Negative** (i.e the number of pair of points that belongs in the\nsame clusters in ``labels_pred`` and not in ``labels_True``).\n\nThe score ranges from 0 to 1. A high value indicates a good similarity\nbetween two clusters.\n\nRead more in the :ref:`User Guide <fowlkes_mallows_scores>`.\n\nParameters\n----------\nlabels_true : int array, shape = (``n_samples``,)\n    A clustering of the data into disjoint subsets.\n\nlabels_pred : array, shape = (``n_samples``, )\n    A clustering of the data into disjoint subsets.\n\nsparse : bool, default=False\n    Compute contingency matrix internally with sparse matrix.\n\nReturns\n-------\nscore : float\n   The resulting Fowlkes-Mallows score.\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have\nscore 1.0::\n\n  >>> from sklearn.metrics.cluster import fowlkes_mallows_score\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nIf classes members are completely split across different clusters,\nthe assignment is totally random, hence the FMI is null::\n\n  >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])\n  0.0\n\nReferences\n----------\n.. [1] `E. B. Fowkles and C. L. Mallows, 1983. \"A method for comparing two\n   hierarchical clusterings\". Journal of the American Statistical\n   Association\n   <https://www.tandfonline.com/doi/abs/10.1080/01621459.1983.10478008>`_\n\n.. [2] `Wikipedia entry for the Fowlkes-Mallows Index\n       <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_"
         },
         {
@@ -179534,7 +179534,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels\nof the same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\nMI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n\\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (i.e\n``label_true``) with :math:`V` (i.e. ``label_pred``) will return the\nsame score value. This can be useful to measure the agreement of two\nindependent label assignments strategies on the same dataset when the\nreal ground truth is not known.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.",
+            "description": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels\nof the same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n    MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n    \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (i.e\n``label_true``) with :math:`V` (i.e. ``label_pred``) will return the\nsame score value. This can be useful to measure the agreement of two\nindependent label assignments strategies on the same dataset when the\nreal ground truth is not known.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.",
             "docstring": "Mutual Information between two clusterings.\n\nThe Mutual Information is a measure of the similarity between two labels\nof the same data. Where :math:`|U_i|` is the number of the samples\nin cluster :math:`U_i` and :math:`|V_j|` is the number of the\nsamples in cluster :math:`V_j`, the Mutual Information\nbetween clusterings :math:`U` and :math:`V` is given as:\n\n.. math::\n\n    MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}\n    \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching :math:`U` (i.e\n``label_true``) with :math:`V` (i.e. ``label_pred``) will return the\nsame score value. This can be useful to measure the agreement of two\nindependent label assignments strategies on the same dataset when the\nreal ground truth is not known.\n\nRead more in the :ref:`User Guide <mutual_info_score>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    A clustering of the data into disjoint subsets, called :math:`U` in\n    the above formula.\n\nlabels_pred : int array-like of shape (n_samples,)\n    A clustering of the data into disjoint subsets, called :math:`V` in\n    the above formula.\n\ncontingency : {ndarray, sparse matrix} of shape             (n_classes_true, n_classes_pred), default=None\n    A contingency matrix given by the :func:`contingency_matrix` function.\n    If value is ``None``, it will be computed, otherwise the given value is\n    used, with ``labels_true`` and ``labels_pred`` ignored.\n\nReturns\n-------\nmi : float\n   Mutual information, a non-negative value, measured in nats using the\n   natural logarithm.\n\nNotes\n-----\nThe logarithm used is the natural logarithm (base-e).\n\nSee Also\n--------\nadjusted_mutual_info_score : Adjusted against chance Mutual Information.\nnormalized_mutual_info_score : Normalized Mutual Information."
         },
         {
@@ -179596,7 +179596,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'arithmetic'",
-                        "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\nThe default value of ``average_method`` changed from 'geometric' to\n'arithmetic'."
+                        "description": "How to compute the normalizer in the denominator. Possible options\nare 'min', 'geometric', 'arithmetic', and 'max'.\n\n.. versionadded:: 0.20\n\n.. versionchanged:: 0.22\n   The default value of ``average_method`` changed from 'geometric' to\n   'arithmetic'."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -179737,7 +179737,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\nRI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.",
+            "description": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.",
             "docstring": "Rand index.\n\nThe Rand Index computes a similarity measure between two clusterings\nby considering all pairs of samples and counting pairs that are\nassigned in the same or different clusters in the predicted and\ntrue clusterings.\n\nThe raw RI score is:\n\n    RI = (number of agreeing pairs) / (number of pairs)\n\nRead more in the :ref:`User Guide <rand_score>`.\n\nParameters\n----------\nlabels_true : array-like of shape (n_samples,), dtype=integral\n    Ground truth class labels to be used as a reference.\n\nlabels_pred : array-like of shape (n_samples,), dtype=integral\n    Cluster labels to evaluate.\n\nReturns\n-------\nRI : float\n   Similarity score between 0.0 and 1.0, inclusive, 1.0 stands for\n   perfect match.\n\nSee Also\n--------\nadjusted_rand_score: Adjusted Rand Score\nadjusted_mutual_info_score: Adjusted Mutual Information\n\nExamples\n--------\nPerfectly matching labelings have a score of 1 even\n\n  >>> from sklearn.metrics.cluster import rand_score\n  >>> rand_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete but may not always be pure, hence penalized:\n\n  >>> rand_score([0, 0, 1, 2], [0, 0, 1, 1])\n  0.83...\n\nReferences\n----------\n.. L. Hubert and P. Arabie, Comparing Partitions, Journal of\n  Classification 1985\n  https://link.springer.com/article/10.1007%2FBF01908075\n\n.. https://en.wikipedia.org/wiki/Simple_matching_coefficient\n\n.. https://en.wikipedia.org/wiki/Rand_index"
         },
         {
@@ -179810,7 +179810,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics", "sklearn/sklearn.metrics.cluster"],
-            "description": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\nv = (1 + beta) * homogeneity * completeness\n/ (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.",
+            "description": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n    v = (1 + beta) * homogeneity * completeness\n         / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.",
             "docstring": "V-measure cluster labeling given a ground truth.\n\nThis score is identical to :func:`normalized_mutual_info_score` with\nthe ``'arithmetic'`` option for averaging.\n\nThe V-measure is the harmonic mean between homogeneity and completeness::\n\n    v = (1 + beta) * homogeneity * completeness\n         / (beta * homogeneity + completeness)\n\nThis metric is independent of the absolute values of the labels:\na permutation of the class or cluster label values won't change the\nscore value in any way.\n\nThis metric is furthermore symmetric: switching ``label_true`` with\n``label_pred`` will return the same score value. This can be useful to\nmeasure the agreement of two independent label assignments strategies\non the same dataset when the real ground truth is not known.\n\n\nRead more in the :ref:`User Guide <homogeneity_completeness>`.\n\nParameters\n----------\nlabels_true : int array, shape = [n_samples]\n    ground truth class labels to be used as a reference\n\nlabels_pred : array-like of shape (n_samples,)\n    cluster labels to evaluate\n\nbeta : float, default=1.0\n    Ratio of weight attributed to ``homogeneity`` vs ``completeness``.\n    If ``beta`` is greater than 1, ``completeness`` is weighted more\n    strongly in the calculation. If ``beta`` is less than 1,\n    ``homogeneity`` is weighted more strongly.\n\nReturns\n-------\nv_measure : float\n   score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling\n\nReferences\n----------\n\n.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A\n   conditional entropy-based external cluster evaluation measure\n   <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_\n\nSee Also\n--------\nhomogeneity_score\ncompleteness_score\nnormalized_mutual_info_score\n\nExamples\n--------\n\nPerfect labelings are both homogeneous and complete, hence have score 1.0::\n\n  >>> from sklearn.metrics.cluster import v_measure_score\n  >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])\n  1.0\n  >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])\n  1.0\n\nLabelings that assign all classes members to the same clusters\nare complete be not homogeneous, hence penalized::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))\n  0.66...\n\nLabelings that have pure clusters with members coming from the same\nclasses are homogeneous but un-necessary splits harms completeness\nand thus penalize V-measure as well::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))\n  0.8...\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))\n  0.66...\n\nIf classes members are completely split across different clusters,\nthe assignment is totally incomplete, hence the V-Measure is null::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))\n  0.0...\n\nClusters that include samples from totally different classes totally\ndestroy the homogeneity of the labeling, hence::\n\n  >>> print(\"%.6f\" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))\n  0.0..."
         },
         {
@@ -181013,7 +181013,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\nk(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.",
+            "description": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n    k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.",
             "docstring": "Computes the additive chi-squared kernel between observations in X and\nY.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n    k(x, y) = -Sum [(x - y)^2 / (x + y)]\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.\n\nNotes\n-----\nAs the negative of a distance, this kernel is only conditionally positive\ndefinite.\n\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    If `None`, uses `Y=X`.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nchi2_kernel : The exponentiated version of the kernel, which is usually\n    preferable.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n    to this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n  Local features and kernels for classification of texture and object\n  categories: A comprehensive study\n  International Journal of Computer Vision 2007\n  https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf"
         },
         {
@@ -181226,7 +181226,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "True",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\ncannot be infinite.\n\n.. versionadded:: 0.22\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`."
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n  cannot be infinite.\n\n.. versionadded:: 0.22\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -181327,7 +181327,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\nk(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.",
+            "description": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n    k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.",
             "docstring": "Computes the exponential chi-squared kernel X and Y.\n\nThe chi-squared kernel is computed between each pair of rows in X and Y.  X\nand Y have to be non-negative. This kernel is most commonly applied to\nhistograms.\n\nThe chi-squared kernel is given by::\n\n    k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])\n\nIt can be interpreted as a weighted difference per entry.\n\nRead more in the :ref:`User Guide <chi2_kernel>`.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ngamma : float, default=1.\n    Scaling parameter of the chi2 kernel.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n\nSee Also\n--------\nadditive_chi2_kernel : The additive version of this kernel.\nsklearn.kernel_approximation.AdditiveChi2Sampler : A Fourier approximation\n    to the additive version of this kernel.\n\nReferences\n----------\n* Zhang, J. and Marszalek, M. and Lazebnik, S. and Schmid, C.\n  Local features and kernels for classification of texture and object\n  categories: A comprehensive study\n  International Journal of Computer Vision 2007\n  https://research.microsoft.com/en-us/um/people/manik/projects/trade-off/papers/ZhangIJCV06.pdf"
         },
         {
@@ -181463,7 +181463,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Whether to return dense output even when the input is sparse. If\n``False``, the output is sparse if both input arrays are sparse.\n\n.. versionadded:: 0.17\nparameter ``dense_output`` for dense output."
+                        "description": "Whether to return dense output even when the input is sparse. If\n``False``, the output is sparse if both input arrays are sparse.\n\n.. versionadded:: 0.17\n   parameter ``dense_output`` for dense output."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -181474,7 +181474,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\nK(X, Y) = <X, Y> / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide <cosine_similarity>`.",
+            "description": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n    K(X, Y) = <X, Y> / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide <cosine_similarity>`.",
             "docstring": "Compute cosine similarity between samples in X and Y.\n\nCosine similarity, or the cosine kernel, computes similarity as the\nnormalized dot product of X and Y:\n\n    K(X, Y) = <X, Y> / (||X||*||Y||)\n\nOn L2-normalized data, this function is equivalent to linear_kernel.\n\nRead more in the :ref:`User Guide <cosine_similarity>`.\n\nParameters\n----------\nX : {ndarray, sparse matrix} of shape (n_samples_X, n_features)\n    Input data.\n\nY : {ndarray, sparse matrix} of shape (n_samples_Y, n_features),             default=None\n    Input data. If ``None``, the output will be the pairwise\n    similarities between all samples in ``X``.\n\ndense_output : bool, default=True\n    Whether to return dense output even when the input is sparse. If\n    ``False``, the output is sparse if both input arrays are sparse.\n\n    .. versionadded:: 0.17\n       parameter ``dense_output`` for dense output.\n\nReturns\n-------\nkernel matrix : ndarray of shape (n_samples_X, n_samples_Y)"
         },
         {
@@ -181602,7 +181602,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute the distance matrix between each pair from a vector array X and Y.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\ndist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide <metrics>`.",
+            "description": "Compute the distance matrix between each pair from a vector array X and Y.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n    dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide <metrics>`.",
             "docstring": "Compute the distance matrix between each pair from a vector array X and Y.\n\nFor efficiency reasons, the euclidean distance between a pair of row\nvector x and y is computed as::\n\n    dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))\n\nThis formulation has two advantages over other ways of computing distances.\nFirst, it is computationally efficient when dealing with sparse data.\nSecond, if one argument varies but the other remains unchanged, then\n`dot(x, x)` and/or `dot(y, y)` can be pre-computed.\n\nHowever, this is not the most precise way of doing this computation,\nbecause this equation potentially suffers from \"catastrophic cancellation\".\nAlso, the distance matrix returned by this function may not be exactly\nsymmetric as required by, e.g., ``scipy.spatial.distance`` functions.\n\nRead more in the :ref:`User Guide <metrics>`.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n    An array where each row is a sample and each column is a feature.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features),             default=None\n    An array where each row is a sample and each column is a feature.\n    If `None`, method uses `Y=X`.\n\nY_norm_squared : array-like of shape (n_samples_Y,) or (n_samples_Y, 1)             or (1, n_samples_Y), default=None\n    Pre-computed dot-products of vectors in Y (e.g.,\n    ``(Y**2).sum(axis=1)``)\n    May be ignored in some cases, see the note below.\n\nsquared : bool, default=False\n    Return squared Euclidean distances.\n\nX_norm_squared : array-like of shape (n_samples_X,) or (n_samples_X, 1)             or (1, n_samples_X), default=None\n    Pre-computed dot-products of vectors in X (e.g.,\n    ``(X**2).sum(axis=1)``)\n    May be ignored in some cases, see the note below.\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n    Returns the distances between the row vectors of `X`\n    and the row vectors of `Y`.\n\nSee Also\n--------\npaired_distances : Distances betweens pairs of elements of X and Y.\n\nNotes\n-----\nTo achieve a better accuracy, `X_norm_squared`\u00a0and `Y_norm_squared` may be\nunused if they are passed as `np.float32`.\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import euclidean_distances\n>>> X = [[0, 1], [1, 1]]\n>>> # distance between rows of X\n>>> euclidean_distances(X, X)\narray([[0., 1.],\n       [1., 0.]])\n>>> # get distance to origin\n>>> euclidean_distances(X, [[0, 0]])\narray([[1.        ],\n       [1.41421356]])"
         },
         {
@@ -181649,7 +181649,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\nD(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n+ \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]",
+            "description": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n   D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n                            + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]",
             "docstring": "Compute the Haversine distance between samples in X and Y.\n\nThe Haversine (or great circle) distance is the angular distance between\ntwo points on the surface of a sphere. The first coordinate of each point\nis assumed to be the latitude, the second is the longitude, given\nin radians. The dimension of the data must be 2.\n\n.. math::\n   D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)\n                            + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]\n\nParameters\n----------\nX : array-like of shape (n_samples_X, 2)\n    A feature array.\n\nY : array-like of shape (n_samples_Y, 2), default=None\n    An optional second feature array. If `None`, uses `Y=X`.\n\nReturns\n-------\ndistance : ndarray of shape (n_samples_X, n_samples_Y)\n    The distance matrix.\n\nNotes\n-----\nAs the Earth is nearly spherical, the haversine formula provides a good\napproximation of the distance between two points of the Earth surface, with\na less than 1% error on average.\n\nExamples\n--------\nWe want to calculate the distance between the Ezeiza Airport\n(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris,\nFrance).\n\n>>> from sklearn.metrics.pairwise import haversine_distances\n>>> from math import radians\n>>> bsas = [-34.83333, -58.5166646]\n>>> paris = [49.0083899664, 2.53844117956]\n>>> bsas_in_radians = [radians(_) for _ in bsas]\n>>> paris_in_radians = [radians(_) for _ in paris]\n>>> result = haversine_distances([bsas_in_radians, paris_in_radians])\n>>> result * 6371000/1000  # multiply by Earth radius to get kilometers\narray([[    0.        , 11099.54035582],\n       [11099.54035582,     0.        ]])"
         },
         {
@@ -181661,7 +181661,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n===============   ========================================\nmetric            Function\n===============   ========================================\n'additive_chi2'   sklearn.pairwise.additive_chi2_kernel\n'chi2'            sklearn.pairwise.chi2_kernel\n'linear'          sklearn.pairwise.linear_kernel\n'poly'            sklearn.pairwise.polynomial_kernel\n'polynomial'      sklearn.pairwise.polynomial_kernel\n'rbf'             sklearn.pairwise.rbf_kernel\n'laplacian'       sklearn.pairwise.laplacian_kernel\n'sigmoid'         sklearn.pairwise.sigmoid_kernel\n'cosine'          sklearn.pairwise.cosine_similarity\n===============   ========================================\n\nRead more in the :ref:`User Guide <metrics>`.",
+            "description": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n  ===============   ========================================\n  metric            Function\n  ===============   ========================================\n  'additive_chi2'   sklearn.pairwise.additive_chi2_kernel\n  'chi2'            sklearn.pairwise.chi2_kernel\n  'linear'          sklearn.pairwise.linear_kernel\n  'poly'            sklearn.pairwise.polynomial_kernel\n  'polynomial'      sklearn.pairwise.polynomial_kernel\n  'rbf'             sklearn.pairwise.rbf_kernel\n  'laplacian'       sklearn.pairwise.laplacian_kernel\n  'sigmoid'         sklearn.pairwise.sigmoid_kernel\n  'cosine'          sklearn.pairwise.cosine_similarity\n  ===============   ========================================\n\nRead more in the :ref:`User Guide <metrics>`.",
             "docstring": "Valid metrics for pairwise_kernels.\n\nThis function simply returns the valid pairwise distance metrics.\nIt exists, however, to allow for a verbose description of the mapping for\neach of the valid strings.\n\nThe valid distance metrics, and the function they map to, are:\n  ===============   ========================================\n  metric            Function\n  ===============   ========================================\n  'additive_chi2'   sklearn.pairwise.additive_chi2_kernel\n  'chi2'            sklearn.pairwise.chi2_kernel\n  'linear'          sklearn.pairwise.linear_kernel\n  'poly'            sklearn.pairwise.polynomial_kernel\n  'polynomial'      sklearn.pairwise.polynomial_kernel\n  'rbf'             sklearn.pairwise.rbf_kernel\n  'laplacian'       sklearn.pairwise.laplacian_kernel\n  'sigmoid'         sklearn.pairwise.sigmoid_kernel\n  'cosine'          sklearn.pairwise.cosine_similarity\n  ===============   ========================================\n\nRead more in the :ref:`User Guide <metrics>`."
         },
         {
@@ -181725,7 +181725,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\nK(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide <laplacian_kernel>`.\n\n.. versionadded:: 0.17",
+            "description": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n    K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide <laplacian_kernel>`.\n\n.. versionadded:: 0.17",
             "docstring": "Compute the laplacian kernel between X and Y.\n\nThe laplacian kernel is defined as::\n\n    K(x, y) = exp(-gamma ||x-y||_1)\n\nfor each pair of rows x in X and y in Y.\nRead more in the :ref:`User Guide <laplacian_kernel>`.\n\n.. versionadded:: 0.17\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n    A feature array.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. If `None`, uses `Y=X`.\n\ngamma : float, default=None\n    If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)\n    The kernel matrix."
         },
         {
@@ -181960,7 +181960,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\ndist(x,y) = sqrt(weight * sq. distance from present coordinates)\nwhere,\nweight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n.. math::\n\\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide <metrics>`.\n\n.. versionadded:: 0.22",
+            "description": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n    dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n    where,\n    weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n    .. math::\n        \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide <metrics>`.\n\n.. versionadded:: 0.22",
             "docstring": "Calculate the euclidean distances in the presence of missing values.\n\nCompute the euclidean distance between each pair of samples in X and Y,\nwhere Y=X is assumed if Y=None. When calculating the distance between a\npair of samples, this formulation ignores feature coordinates with a\nmissing value in either sample and scales up the weight of the remaining\ncoordinates:\n\n    dist(x,y) = sqrt(weight * sq. distance from present coordinates)\n    where,\n    weight = Total # of coordinates / # of present coordinates\n\nFor example, the distance between ``[3, na, na, 6]`` and ``[1, na, 4, 5]``\nis:\n\n    .. math::\n        \\sqrt{\\frac{4}{2}((3-1)^2 + (6-5)^2)}\n\nIf all the coordinates are missing or if there are no common present\ncoordinates then NaN is returned for that pair.\n\nRead more in the :ref:`User Guide <metrics>`.\n\n.. versionadded:: 0.22\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n    An array where each row is a sample and each column is a feature.\n\nY : array-like of shape (n_samples_Y, n_features), default=None\n    An array where each row is a sample and each column is a feature.\n    If `None`, method uses `Y=X`.\n\nsquared : bool, default=False\n    Return squared Euclidean distances.\n\nmissing_values : np.nan or int, default=np.nan\n    Representation of missing value.\n\ncopy : bool, default=True\n    Make and use a deep copy of X and Y (if Y exists).\n\nReturns\n-------\ndistances : ndarray of shape (n_samples_X, n_samples_Y)\n    Returns the distances between the row vectors of `X`\n    and the row vectors of `Y`.\n\nSee Also\n--------\npaired_distances : Distances between pairs of elements of X and Y.\n\nReferences\n----------\n* John K. Dixon, \"Pattern Recognition with Partly Missing Data\",\n  IEEE Transactions on Systems, Man, and Cybernetics, Volume: 9, Issue:\n  10, pp. 617 - 621, Oct. 1979.\n  http://ieeexplore.ieee.org/abstract/document/4310090/\n\nExamples\n--------\n>>> from sklearn.metrics.pairwise import nan_euclidean_distances\n>>> nan = float(\"NaN\")\n>>> X = [[0, 1], [1, nan]]\n>>> nan_euclidean_distances(X, X) # distance between rows of X\narray([[0.        , 1.41421356],\n       [1.41421356, 0.        ]])\n\n>>> # get distance to origin\n>>> nan_euclidean_distances(X, [[0, 0]])\narray([[1.        ],\n       [1.41421356]])"
         },
         {
@@ -182287,7 +182287,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "True",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\nfor a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\ncannot be infinite.\n\n.. versionadded:: 0.22\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`."
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\nfor a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n  cannot be infinite.\n\n.. versionadded:: 0.22\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -182324,7 +182324,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']. These metrics support sparse matrix\ninputs.\n['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\nSee the documentation for scipy.spatial.distance for details on these\nmetrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide <metrics>`.",
+            "description": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']. These metrics support sparse matrix\n  inputs.\n  ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n  'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n  'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n  See the documentation for scipy.spatial.distance for details on these\n  metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide <metrics>`.",
             "docstring": "Compute the distance matrix from a vector array X and optional Y.\n\nThis method takes either a vector array or a distance matrix, and returns\na distance matrix. If the input is a vector array, the distances are\ncomputed. If the input is a distances matrix, it is returned instead.\n\nThis method provides a safe way to take a distance matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\ndistance between the arrays from both X and Y.\n\nValid values for metric are:\n\n- From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']. These metrics support sparse matrix\n  inputs.\n  ['nan_euclidean'] but it does not yet support sparse matrices.\n\n- From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',\n  'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',\n  'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']\n  See the documentation for scipy.spatial.distance for details on these\n  metrics. These metrics do not support sparse matrix inputs.\n\nNote that in the case of 'cityblock', 'cosine' and 'euclidean' (which are\nvalid scipy.spatial.distance metrics), the scikit-learn implementation\nwill be used, which is faster and has support for sparse matrices (except\nfor 'cityblock'). For a verbose description of the metrics from\nscikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics\nfunction.\n\nRead more in the :ref:`User Guide <metrics>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_features)\n    Array of pairwise distances between samples, or a feature array.\n    The shape of the array should be (n_samples_X, n_samples_X) if\n    metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    An optional second feature array. Only allowed if\n    metric != \"precomputed\".\n\nmetric : str or callable, default='euclidean'\n    The metric to use when calculating distance between instances in a\n    feature array. If metric is a string, it must be one of the options\n    allowed by scipy.spatial.distance.pdist for its metric parameter, or\n    a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``.\n    If metric is \"precomputed\", X is assumed to be a distance matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays from X as input and return a value indicating\n    the distance between them.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by breaking\n    down the pairwise matrix into n_jobs even slices and computing them in\n    parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\nforce_all_finite : bool or 'allow-nan', default=True\n    Whether to raise an error on np.inf, np.nan, pd.NA in array. Ignored\n    for a metric listed in ``pairwise.PAIRWISE_DISTANCE_FUNCTIONS``. The\n    possibilities are:\n\n    - True: Force all values of array to be finite.\n    - False: accepts np.inf, np.nan, pd.NA in array.\n    - 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n      cannot be infinite.\n\n    .. versionadded:: 0.22\n       ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n    .. versionchanged:: 0.23\n       Accepts `pd.NA` and converts it into `np.nan`.\n\n**kwds : optional keyword parameters\n    Any further parameters are passed directly to the distance function.\n    If using a scipy.spatial.distance metric, the parameters are still\n    metric dependent. See the scipy docs for usage examples.\n\nReturns\n-------\nD : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_samples_Y)\n    A distance matrix D such that D_{i, j} is the distance between the\n    ith and jth vectors of the given matrix X, if Y is None.\n    If Y is not None, then D_{i, j} is the distance between the ith array\n    from X and the jth array from Y.\n\nSee Also\n--------\npairwise_distances_chunked : Performs the same calculation as this\n    function, but returns a generator of chunks of the distance matrix, in\n    order to limit memory usage.\npaired_distances : Computes the distances between corresponding elements\n    of two arrays."
         },
         {
@@ -182394,7 +182394,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "\"euclidean\"",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -182431,7 +182431,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\npairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.",
+            "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n    pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.",
             "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance).\n\nThis is mostly equivalent to calling:\n\n    pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis)\n\nbut uses much less memory, and is faster for large arrays.\n\nThis function works with dense 2D arrays only.\n\nParameters\n----------\nX : array-like of shape (n_samples_X, n_features)\n    Array containing points.\n\nY : array-like of shape (n_samples_Y, n_features)\n    Arrays containing points.\n\naxis : int, default=1\n    Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default=\"euclidean\"\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : numpy.ndarray\n    Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin_min"
         },
         {
@@ -182519,7 +182519,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'euclidean'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -182556,7 +182556,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n(pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\npairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.",
+            "description": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n    (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n     pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.",
             "docstring": "Compute minimum distances between one point and a set of points.\n\nThis function computes for each row in X, the index of the row of Y which\nis closest (according to the specified distance). The minimal distances are\nalso returned.\n\nThis is mostly equivalent to calling:\n\n    (pairwise_distances(X, Y=Y, metric=metric).argmin(axis=axis),\n     pairwise_distances(X, Y=Y, metric=metric).min(axis=axis))\n\nbut uses much less memory, and is faster for large arrays.\n\nParameters\n----------\nX : {array-like, sparse matrix} of shape (n_samples_X, n_features)\n    Array containing points.\n\nY : {array-like, sparse matrix} of shape (n_samples_Y, n_features)\n    Array containing points.\n\naxis : int, default=1\n    Axis along which the argmin and distances are to be computed.\n\nmetric : str or callable, default='euclidean'\n    Metric to use for distance computation. Any metric from scikit-learn\n    or scipy.spatial.distance can be used.\n\n    If metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two arrays as input and return one value indicating the\n    distance between them. This works for Scipy's metrics, but is less\n    efficient than passing the metric name as a string.\n\n    Distance matrices are not supported.\n\n    Valid values for metric are:\n\n    - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n      'manhattan']\n\n    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n      'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n      'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n      'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n      'yule']\n\n    See the documentation for scipy.spatial.distance for details on these\n    metrics.\n\nmetric_kwargs : dict, default=None\n    Keyword arguments to pass to specified metric function.\n\nReturns\n-------\nargmin : ndarray\n    Y[argmin[i], :] is the row in Y that is closest to X[i, :].\n\ndistances : ndarray\n    distances[i] is the distance between the i-th row in X and the\n    argmin[i]-th row in Y.\n\nSee Also\n--------\nsklearn.metrics.pairwise_distances\nsklearn.metrics.pairwise_distances_argmin"
         },
         {
@@ -182818,7 +182818,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.metrics"],
-            "description": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide <metrics>`.",
+            "description": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n    ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n    'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide <metrics>`.",
             "docstring": "Compute the kernel between arrays X and optional array Y.\n\nThis method takes either a vector array or a kernel matrix, and returns\na kernel matrix. If the input is a vector array, the kernels are\ncomputed. If the input is a kernel matrix, it is returned instead.\n\nThis method provides a safe way to take a kernel matrix as input, while\npreserving compatibility with many other algorithms that take a vector\narray.\n\nIf Y is given (default is None), then the returned matrix is the pairwise\nkernel between the arrays from both X and Y.\n\nValid values for metric are:\n    ['additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf',\n    'laplacian', 'sigmoid', 'cosine']\n\nRead more in the :ref:`User Guide <metrics>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_features)\n    Array of pairwise kernels between samples, or a feature array.\n    The shape of the array should be (n_samples_X, n_samples_X) if\n    metric == \"precomputed\" and (n_samples_X, n_features) otherwise.\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    A second feature array only if X has shape (n_samples_X, n_features).\n\nmetric : str or callable, default=\"linear\"\n    The metric to use when calculating kernel between instances in a\n    feature array. If metric is a string, it must be one of the metrics\n    in pairwise.PAIRWISE_KERNEL_FUNCTIONS.\n    If metric is \"precomputed\", X is assumed to be a kernel matrix.\n    Alternatively, if metric is a callable function, it is called on each\n    pair of instances (rows) and the resulting value recorded. The callable\n    should take two rows from X as input and return the corresponding\n    kernel value as a single number. This means that callables from\n    :mod:`sklearn.metrics.pairwise` are not allowed, as they operate on\n    matrices, not single samples. Use the string identifying the kernel\n    instead.\n\nfilter_params : bool, default=False\n    Whether to filter invalid parameters or not.\n\nn_jobs : int, default=None\n    The number of jobs to use for the computation. This works by breaking\n    down the pairwise matrix into n_jobs even slices and computing them in\n    parallel.\n\n    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n    for more details.\n\n**kwds : optional keyword parameters\n    Any further parameters are passed directly to the kernel function.\n\nReturns\n-------\nK : ndarray of shape (n_samples_X, n_samples_X) or             (n_samples_X, n_samples_Y)\n    A kernel matrix K such that K_{i, j} is the kernel between the\n    ith and jth vectors of the given matrix X, if Y is None.\n    If Y is not None, then K_{i, j} is the kernel between the ith array\n    from X and the jth array from Y.\n\nNotes\n-----\nIf metric is 'precomputed', Y is ignored and X is returned."
         },
         {
@@ -182916,7 +182916,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the polynomial kernel between X and Y::\n\nK(X, Y) = (gamma <X, Y> + coef0)^degree\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.",
+            "description": "Compute the polynomial kernel between X and Y::\n\n    K(X, Y) = (gamma <X, Y> + coef0)^degree\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.",
             "docstring": "Compute the polynomial kernel between X and Y::\n\n    K(X, Y) = (gamma <X, Y> + coef0)^degree\n\nRead more in the :ref:`User Guide <polynomial_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n\ndegree : int, default=3\n\ngamma : float, default=None\n    If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)"
         },
         {
@@ -182980,7 +182980,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the rbf (gaussian) kernel between X and Y::\n\nK(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide <rbf_kernel>`.",
+            "description": "Compute the rbf (gaussian) kernel between X and Y::\n\n    K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide <rbf_kernel>`.",
             "docstring": "Compute the rbf (gaussian) kernel between X and Y::\n\n    K(x, y) = exp(-gamma ||x-y||^2)\n\nfor each pair of rows x in X and y in Y.\n\nRead more in the :ref:`User Guide <rbf_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    If `None`, uses `Y=X`.\n\ngamma : float, default=None\n    If None, defaults to 1.0 / n_features.\n\nReturns\n-------\nkernel_matrix : ndarray of shape (n_samples_X, n_samples_Y)"
         },
         {
@@ -183061,7 +183061,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the sigmoid kernel between X and Y::\n\nK(X, Y) = tanh(gamma <X, Y> + coef0)\n\nRead more in the :ref:`User Guide <sigmoid_kernel>`.",
+            "description": "Compute the sigmoid kernel between X and Y::\n\n    K(X, Y) = tanh(gamma <X, Y> + coef0)\n\nRead more in the :ref:`User Guide <sigmoid_kernel>`.",
             "docstring": "Compute the sigmoid kernel between X and Y::\n\n    K(X, Y) = tanh(gamma <X, Y> + coef0)\n\nRead more in the :ref:`User Guide <sigmoid_kernel>`.\n\nParameters\n----------\nX : ndarray of shape (n_samples_X, n_features)\n\nY : ndarray of shape (n_samples_Y, n_features), default=None\n    If `None`, uses `Y=X`.\n\ngamma : float, default=None\n    If None, defaults to 1.0 / n_features.\n\ncoef0 : float, default=1\n\nReturns\n-------\nGram matrix : ndarray of shape (n_samples_X, n_samples_Y)"
         },
         {
@@ -184421,11 +184421,11 @@
                     "docstring": {
                         "type": "{'full', 'tied', 'diag', 'spherical'}",
                         "default_value": "'full'",
-                        "description": "String describing the type of covariance parameters to use.\nMust be one of::\n\n'full' (each component has its own general covariance matrix),\n'tied' (all components share the same general covariance matrix),\n'diag' (each component has its own diagonal covariance matrix),\n'spherical' (each component has its own single variance)."
+                        "description": "String describing the type of covariance parameters to use.\nMust be one of::\n\n    'full' (each component has its own general covariance matrix),\n    'tied' (all components share the same general covariance matrix),\n    'diag' (each component has its own diagonal covariance matrix),\n    'spherical' (each component has its own single variance)."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 },
                 {
@@ -184506,11 +184506,11 @@
                     "docstring": {
                         "type": "{'kmeans', 'k-means++', 'random', 'random_from_data'}",
                         "default_value": "'kmeans'",
-                        "description": "The method used to initialize the weights, the means and the\ncovariances.\nString must be one of:\n\n'kmeans' : responsibilities are initialized using kmeans.\n'k-means++' : use the k-means++ method to initialize.\n'random' : responsibilities are initialized randomly.\n'random_from_data' : initial means are randomly selected data points.\n\n.. versionchanged:: v1.1\n`init_params` now accepts 'random_from_data' and 'k-means++' as\ninitialization methods."
+                        "description": "The method used to initialize the weights, the means and the\ncovariances.\nString must be one of:\n\n    'kmeans' : responsibilities are initialized using kmeans.\n    'k-means++' : use the k-means++ method to initialize.\n    'random' : responsibilities are initialized randomly.\n    'random_from_data' : initial means are randomly selected data points.\n\n.. versionchanged:: v1.1\n    `init_params` now accepts 'random_from_data' and 'k-means++' as\n    initialization methods."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["k-means++", "kmeans", "random", "random_from_data"]
+                        "values": ["random", "random_from_data", "kmeans", "k-means++"]
                     }
                 },
                 {
@@ -184523,7 +184523,7 @@
                     "docstring": {
                         "type": "str",
                         "default_value": "'dirichlet_process'",
-                        "description": "String describing the type of the weight concentration prior.\nMust be one of::\n\n'dirichlet_process' (using the Stick-breaking representation),\n'dirichlet_distribution' (can favor more uniform weights)."
+                        "description": "String describing the type of the weight concentration prior.\nMust be one of::\n\n    'dirichlet_process' (using the Stick-breaking representation),\n    'dirichlet_distribution' (can favor more uniform weights)."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -184644,7 +184644,7 @@
                     "docstring": {
                         "type": "float or array-like",
                         "default_value": "None",
-                        "description": "The prior on the covariance distribution (Wishart).\nIf it is None, the emiprical covariance prior is initialized using the\ncovariance of X. The shape depends on `covariance_type`::\n\n(n_features, n_features) if 'full',\n(n_features, n_features) if 'tied',\n(n_features)             if 'diag',\nfloat                    if 'spherical'"
+                        "description": "The prior on the covariance distribution (Wishart).\nIf it is None, the emiprical covariance prior is initialized using the\ncovariance of X. The shape depends on `covariance_type`::\n\n        (n_features, n_features) if 'full',\n        (n_features, n_features) if 'tied',\n        (n_features)             if 'diag',\n        float                    if 'spherical'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -185916,7 +185916,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 },
                 {
@@ -185997,11 +185997,11 @@
                     "docstring": {
                         "type": "{'kmeans', 'k-means++', 'random', 'random_from_data'}",
                         "default_value": "'kmeans'",
-                        "description": "The method used to initialize the weights, the means and the\nprecisions.\nString must be one of:\n\n- 'kmeans' : responsibilities are initialized using kmeans.\n- 'k-means++' : use the k-means++ method to initialize.\n- 'random' : responsibilities are initialized randomly.\n- 'random_from_data' : initial means are randomly selected data points.\n\n.. versionchanged:: v1.1\n`init_params` now accepts 'random_from_data' and 'k-means++' as\ninitialization methods."
+                        "description": "The method used to initialize the weights, the means and the\nprecisions.\nString must be one of:\n\n- 'kmeans' : responsibilities are initialized using kmeans.\n- 'k-means++' : use the k-means++ method to initialize.\n- 'random' : responsibilities are initialized randomly.\n- 'random_from_data' : initial means are randomly selected data points.\n\n.. versionchanged:: v1.1\n    `init_params` now accepts 'random_from_data' and 'k-means++' as\n    initialization methods."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["k-means++", "kmeans", "random", "random_from_data"]
+                        "values": ["random", "random_from_data", "kmeans", "k-means++"]
                     }
                 },
                 {
@@ -186048,7 +186048,7 @@
                     "docstring": {
                         "type": "array-like",
                         "default_value": "None",
-                        "description": "The user-provided initial precisions (inverse of the covariance\nmatrices).\nIf it is None, precisions are initialized using the 'init_params'\nmethod.\nThe shape depends on 'covariance_type'::\n\n(n_components,)                        if 'spherical',\n(n_features, n_features)               if 'tied',\n(n_components, n_features)             if 'diag',\n(n_components, n_features, n_features) if 'full'"
+                        "description": "The user-provided initial precisions (inverse of the covariance\nmatrices).\nIf it is None, precisions are initialized using the 'init_params'\nmethod.\nThe shape depends on 'covariance_type'::\n\n    (n_components,)                        if 'spherical',\n    (n_features, n_features)               if 'tied',\n    (n_components, n_features)             if 'diag',\n    (n_components, n_features, n_features) if 'full'"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -186964,7 +186964,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 },
                 {
@@ -187028,7 +187028,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 }
             ],
@@ -187495,7 +187495,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 }
             ],
@@ -187576,7 +187576,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["diag", "spherical", "full", "tied"]
+                        "values": ["full", "spherical", "diag", "tied"]
                     }
                 }
             ],
@@ -187947,7 +187947,7 @@
                     "docstring": {
                         "type": "callable",
                         "default_value": "",
-                        "description": "This callback accepts:\n- a list of candidates, where each candidate is a dict of\nparameter settings.\n- an optional `cv` parameter which can be used to e.g.\nevaluate candidates on different dataset splits, or\nevaluate candidates on subsampled data (as done in the\nSucessiveHaling estimators). By default, the original `cv`\nparameter is used, and it is available as a private\n`_checked_cv_orig` attribute.\n- an optional `more_results` dict. Each key will be added to\nthe `cv_results_` attribute. Values should be lists of\nlength `n_candidates`\n\nIt returns a dict of all results so far, formatted like\n``cv_results_``.\n\nImportant note (relevant whether the default cv is used or not):\nin randomized splitters, and unless the random_state parameter of\ncv was set to an int, calling cv.split() multiple times will\nyield different splits. Since cv.split() is called in\nevaluate_candidates, this means that candidates will be evaluated\non different splits each time evaluate_candidates is called. This\nmight be a methodological issue depending on the search strategy\nthat you're implementing. To prevent randomized splitters from\nbeing used, you may use _split._yields_constant_splits()"
+                        "description": "This callback accepts:\n    - a list of candidates, where each candidate is a dict of\n      parameter settings.\n    - an optional `cv` parameter which can be used to e.g.\n      evaluate candidates on different dataset splits, or\n      evaluate candidates on subsampled data (as done in the\n      SucessiveHaling estimators). By default, the original `cv`\n      parameter is used, and it is available as a private\n      `_checked_cv_orig` attribute.\n    - an optional `more_results` dict. Each key will be added to\n      the `cv_results_` attribute. Values should be lists of\n      length `n_candidates`\n\nIt returns a dict of all results so far, formatted like\n``cv_results_``.\n\nImportant note (relevant whether the default cv is used or not):\nin randomized splitters, and unless the random_state parameter of\ncv was set to an int, calling cv.split() multiple times will\nyield different splits. Since cv.split() is called in\nevaluate_candidates, this means that candidates will be evaluated\non different splits each time evaluate_candidates is called. This\nmight be a methodological issue depending on the search strategy\nthat you're implementing. To prevent randomized splitters from\nbeing used, you may use _split._yields_constant_splits()"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -188661,7 +188661,7 @@
                     "docstring": {
                         "type": "str, callable, list, tuple or dict",
                         "default_value": "None",
-                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
+                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n  names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -188699,7 +188699,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -188716,7 +188716,7 @@
                     "docstring": {
                         "type": "bool, str, or callable",
                         "default_value": "True",
-                        "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given ``cv_results_``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``GridSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\nSupport for callable added."
+                        "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given ``cv_results_``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``GridSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\n    Support for callable added."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -188746,7 +188746,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -188776,7 +188776,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "",
-                        "description": "Controls the verbosity: the higher, the more messages.\n\n- >1 : the computation time for each fold and parameter candidate is\ndisplayed;\n- >2 : the score is also displayed;\n- >3 : the fold and candidate parameter indexes are also displayed\ntogether with the starting time of the computation."
+                        "description": "Controls the verbosity: the higher, the more messages.\n\n- >1 : the computation time for each fold and parameter candidate is\n  displayed;\n- >2 : the score is also displayed;\n- >3 : the fold and candidate parameter indexes are also displayed\n  together with the starting time of the computation."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -188793,7 +188793,7 @@
                     "docstring": {
                         "type": "int, or str",
                         "default_value": "'2*n_jobs'",
-                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- None, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'"
+                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n    - None, in which case all the jobs are immediately\n      created and spawned. Use this for lightweight and\n      fast-running jobs, to avoid delays due to on-demand\n      spawning of the jobs\n\n    - An int, giving the exact number of total jobs that are\n      spawned\n\n    - A str, giving an expression as a function of n_jobs,\n      as in '2*n_jobs'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -188845,7 +188845,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\nDefault value was changed from ``True`` to ``False``"
+                        "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n    Default value was changed from ``True`` to ``False``"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -189313,7 +189313,7 @@
                     "docstring": {
                         "type": "str, callable, list, tuple or dict",
                         "default_value": "None",
-                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example.\n\nIf None, the estimator's score method is used."
+                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n  names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example.\n\nIf None, the estimator's score method is used."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -189351,7 +189351,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -189368,7 +189368,7 @@
                     "docstring": {
                         "type": "bool, str, or callable",
                         "default_value": "True",
-                        "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given the ``cv_results``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``RandomizedSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\nSupport for callable added."
+                        "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given the ``cv_results``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``RandomizedSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\n.. versionchanged:: 0.20\n    Support for callable added."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -189398,7 +189398,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -189445,7 +189445,7 @@
                     "docstring": {
                         "type": "int, or str",
                         "default_value": "'2*n_jobs'",
-                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- None, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'"
+                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n    - None, in which case all the jobs are immediately\n      created and spawned. Use this for lightweight and\n      fast-running jobs, to avoid delays due to on-demand\n      spawning of the jobs\n\n    - An int, giving the exact number of total jobs that are\n      spawned\n\n    - A str, giving an expression as a function of n_jobs,\n      as in '2*n_jobs'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -189527,7 +189527,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\nDefault value was changed from ``True`` to ``False``"
+                        "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n    Default value was changed from ``True`` to ``False``"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -190347,7 +190347,7 @@
                     "docstring": {
                         "type": "{'exhaust', 'smallest'} or int",
                         "default_value": "'exhaust'",
-                        "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n\n- ``n_splits * 2`` when ``resource='n_samples'`` for a regression\nproblem\n- ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\nclassification problem\n- ``1`` when ``resource != 'n_samples'``\n\n- 'exhaust' will set `r0` such that the **last** iteration uses as\nmuch resources as possible. Namely, the last iteration will use the\nhighest value smaller than ``max_resources`` that is a multiple of\nboth ``min_resources`` and ``factor``. In general, using 'exhaust'\nleads to a more accurate estimator, but is slightly more time\nconsuming.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``."
+                        "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n\n    - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n      problem\n    - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n      classification problem\n    - ``1`` when ``resource != 'n_samples'``\n\n- 'exhaust' will set `r0` such that the **last** iteration uses as\n  much resources as possible. Namely, the last iteration will use the\n  highest value smaller than ``max_resources`` that is a multiple of\n  both ``min_resources`` and ``factor``. In general, using 'exhaust'\n  leads to a more accurate estimator, but is slightly more time\n  consuming.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -190390,7 +190390,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or iterable",
                         "default_value": "5",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. note::\nDue to implementation details, the folds produced by `cv` must be\nthe same across multiple calls to `cv.split()`. For\nbuilt-in `scikit-learn` iterators, this can be achieved by\ndeactivating shuffling (`shuffle=False`), or by setting the\n`cv`'s `random_state` parameter to an integer."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n    Due to implementation details, the folds produced by `cv` must be\n    the same across multiple calls to `cv.split()`. For\n    built-in `scikit-learn` iterators, this can be achieved by\n    deactivating shuffling (`shuffle=False`), or by setting the\n    `cv`'s `random_state` parameter to an integer."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -190577,7 +190577,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import ``enable_halving_search_cv``::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> # now you can import normally from model_selection\n>>> from sklearn.model_selection import HalvingGridSearchCV",
+            "description": "Search over specified parameter values with successive halving.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using\nmore and more resources.\n\nRead more in the :ref:`User guide <successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingGridSearchCV",
             "docstring": ""
         },
         {
@@ -190757,7 +190757,7 @@
                     "docstring": {
                         "type": "{'exhaust', 'smallest'} or int",
                         "default_value": "'smallest'",
-                        "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n\n- ``n_splits * 2`` when ``resource='n_samples'`` for a regression\nproblem\n- ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\nclassification problem\n- ``1`` when ``resource != 'n_samples'``\n\n- 'exhaust' will set `r0` such that the **last** iteration uses as\nmuch resources as possible. Namely, the last iteration will use the\nhighest value smaller than ``max_resources`` that is a multiple of\nboth ``min_resources`` and ``factor``. In general, using 'exhaust'\nleads to a more accurate estimator, but is slightly more time\nconsuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``."
+                        "description": "The minimum amount of resource that any candidate is allowed to use\nfor a given iteration. Equivalently, this defines the amount of\nresources `r0` that are allocated for each candidate at the first\niteration.\n\n- 'smallest' is a heuristic that sets `r0` to a small value:\n\n    - ``n_splits * 2`` when ``resource='n_samples'`` for a regression\n      problem\n    - ``n_classes * n_splits * 2`` when ``resource='n_samples'`` for a\n      classification problem\n    - ``1`` when ``resource != 'n_samples'``\n\n- 'exhaust' will set `r0` such that the **last** iteration uses as\n  much resources as possible. Namely, the last iteration will use the\n  highest value smaller than ``max_resources`` that is a multiple of\n  both ``min_resources`` and ``factor``. In general, using 'exhaust'\n  leads to a more accurate estimator, but is slightly more time\n  consuming. 'exhaust' isn't available when `n_candidates='exhaust'`.\n\nNote that the amount of resources used at each iteration is always a\nmultiple of ``min_resources``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -190800,7 +190800,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "5",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. note::\nDue to implementation details, the folds produced by `cv` must be\nthe same across multiple calls to `cv.split()`. For\nbuilt-in `scikit-learn` iterators, this can be achieved by\ndeactivating shuffling (`shuffle=False`), or by setting the\n`cv`'s `random_state` parameter to an integer."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. note::\n    Due to implementation details, the folds produced by `cv` must be\n    the same across multiple calls to `cv.split()`. For\n    built-in `scikit-learn` iterators, this can be achieved by\n    deactivating shuffling (`shuffle=False`), or by setting the\n    `cv`'s `random_state` parameter to an integer."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -190987,7 +190987,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\nThis estimator is still **experimental** for now: the predictions\nand the API might change without any deprecation cycle. To use it,\nyou need to explicitly import ``enable_halving_search_cv``::\n\n>>> # explicitly require this experimental feature\n>>> from sklearn.experimental import enable_halving_search_cv # noqa\n>>> # now you can import normally from model_selection\n>>> from sklearn.model_selection import HalvingRandomSearchCV",
+            "description": "Randomized search on hyper parameters.\n\nThe search strategy starts evaluating all the candidates with a small\namount of resources and iteratively selects the best candidates, using more\nand more resources.\n\nThe candidates are sampled at random from the parameter space and the\nnumber of sampled candidates is determined by ``n_candidates``.\n\nRead more in the :ref:`User guide<successive_halving_user_guide>`.\n\n.. note::\n\n  This estimator is still **experimental** for now: the predictions\n  and the API might change without any deprecation cycle. To use it,\n  you need to explicitly import ``enable_halving_search_cv``::\n\n    >>> # explicitly require this experimental feature\n    >>> from sklearn.experimental import enable_halving_search_cv # noqa\n    >>> # now you can import normally from model_selection\n    >>> from sklearn.model_selection import HalvingRandomSearchCV",
             "docstring": ""
         },
         {
@@ -191901,7 +191901,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n``n_splits`` default value changed from 3 to 5."
+                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n    ``n_splits`` default value changed from 3 to 5."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -192365,7 +192365,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n``n_splits`` default value changed from 3 to 5."
+                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n    ``n_splits`` default value changed from 3 to 5."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -194186,7 +194186,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n``n_splits`` default value changed from 3 to 5."
+                        "description": "Number of folds. Must be at least 2.\n\n.. versionchanged:: 0.22\n    ``n_splits`` default value changed from 3 to 5."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -194752,7 +194752,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "5",
-                        "description": "Number of splits. Must be at least 2.\n\n.. versionchanged:: 0.22\n``n_splits`` default value changed from 3 to 5."
+                        "description": "Number of splits. Must be at least 2.\n\n.. versionchanged:: 0.22\n    ``n_splits`` default value changed from 3 to 5."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -195747,7 +195747,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if classifier is True and ``y`` is either\nbinary or multiclass, :class:`StratifiedKFold` is used. In all other\ncases, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if classifier is True and ``y`` is either\nbinary or multiclass, :class:`StratifiedKFold` is used. In all other\ncases, :class:`KFold` is used.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -196133,7 +196133,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "The data to fit.\n\n.. versionchanged:: 0.20\nX is only required to be an object with finite length or shape now"
+                        "description": "The data to fit.\n\n.. versionchanged:: 0.20\n    X is only required to be an object with finite length or shape now"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -197188,7 +197188,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n_translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n_translate_train_sizes([5, 10], 10) -> [5, 10]",
+            "description": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n    _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n    _translate_train_sizes([5, 10], 10) -> [5, 10]",
             "docstring": "Determine absolute sizes of training subsets and validate 'train_sizes'.\n\nExamples:\n    _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]\n    _translate_train_sizes([5, 10], 10) -> [5, 10]\n\nParameters\n----------\ntrain_sizes : array-like of shape (n_ticks,)\n    Numbers of training examples that will be used to generate the\n    learning curve. If the dtype is float, it is regarded as a\n    fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].\n\nn_max_training_samples : int\n    Maximum number of training samples (upper bound of 'train_sizes').\n\nReturns\n-------\ntrain_sizes_abs : array of shape (n_unique_ticks,)\n    Numbers of training examples that will be used to generate the\n    learning curve. Note that the number of ticks might be less\n    than n_ticks because duplicate entries will be removed."
         },
         {
@@ -197316,7 +197316,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197397,7 +197397,7 @@
                     "docstring": {
                         "type": "int or str",
                         "default_value": "'2*n_jobs'",
-                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- None, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'"
+                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n    - None, in which case all the jobs are immediately\n      created and spawned. Use this for lightweight and\n      fast-running jobs, to avoid delays due to on-demand\n      spawning of the jobs\n\n    - An int, giving the exact number of total jobs that are\n      spawned\n\n    - A str, giving an expression as a function of n_jobs,\n      as in '2*n_jobs'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197427,7 +197427,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["predict_log_proba", "predict", "predict_proba", "decision_function"]
+                        "values": ["decision_function", "predict_proba", "predict_log_proba", "predict"]
                     }
                 }
             ],
@@ -197547,7 +197547,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- `None`, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor `int`/`None` inputs, if the estimator is a classifier and `y` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n`cv` default value if `None` changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- `None`, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable that generates (train, test) splits as arrays of indices.\n\nFor `int`/`None` inputs, if the estimator is a classifier and `y` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    `cv` default value if `None` changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197628,7 +197628,7 @@
                     "docstring": {
                         "type": "int or str",
                         "default_value": "'2*n_jobs'",
-                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- ``None``, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'"
+                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n    - ``None``, in which case all the jobs are immediately\n      created and spawned. Use this for lightweight and\n      fast-running jobs, to avoid delays due to on-demand\n      spawning of the jobs\n\n    - An int, giving the exact number of total jobs that are\n      spawned\n\n    - A str, giving an expression as a function of n_jobs,\n      as in '2*n_jobs'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197761,7 +197761,7 @@
                     "docstring": {
                         "type": "str, callable, list, tuple, or dict",
                         "default_value": "None",
-                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
+                        "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\n  names and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197799,7 +197799,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`.Fold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`.Fold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197880,7 +197880,7 @@
                     "docstring": {
                         "type": "int or str",
                         "default_value": "'2*n_jobs'",
-                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- None, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'"
+                        "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n    - None, in which case all the jobs are immediately\n      created and spawned. Use this for lightweight and\n      fast-running jobs, to avoid delays due to on-demand\n      spawning of the jobs\n\n    - An int, giving the exact number of total jobs that are\n      spawned\n\n    - A str, giving an expression as a function of n_jobs,\n      as in '2*n_jobs'"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -197906,7 +197906,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Whether to include train scores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\nDefault value was changed from ``True`` to ``False``"
+                        "description": "Whether to include train scores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\n    Default value was changed from ``True`` to ``False``"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -198064,7 +198064,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -198394,7 +198394,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- `None`, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor `int`/`None` inputs, if the estimator is a classifier and `y` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n`cv` default value if `None` changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- `None`, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor `int`/`None` inputs, if the estimator is a classifier and `y` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    `cv` default value if `None` changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -198672,7 +198672,7 @@
                     "docstring": {
                         "type": "int, cross-validation generator or an iterable",
                         "default_value": "None",
-                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold."
+                        "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- int, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor int/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n    ``cv`` default value if None changed from 3-fold to 5-fold."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -199253,7 +199253,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "The number of jobs to use for the computation: the `n_classes`\none-vs-rest problems are computed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: 0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "The number of jobs to use for the computation: the `n_classes`\none-vs-rest problems are computed in parallel.\n\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: 0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -200685,7 +200685,7 @@
                     "docstring": {
                         "type": "int or None",
                         "default_value": "(default=None)",
-                        "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary <n_jobs>` for more details.\n\n.. versionchanged:: 0.20\n`n_jobs` default changed from `1` to `None`."
+                        "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary <n_jobs>` for more details.\n\n.. versionchanged:: 0.20\n    `n_jobs` default changed from `1` to `None`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -201018,7 +201018,7 @@
                     "docstring": {
                         "type": "int or None",
                         "default_value": "(default=None)",
-                        "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary <n_jobs>` for more details.\n\n.. versionchanged:: 0.20\n`n_jobs` default changed from `1` to `None`."
+                        "description": "The number of jobs to run in parallel.\n:meth:`fit`, :meth:`predict` and :meth:`partial_fit` (if supported\nby the passed estimator) will be parallelized for each target.\n\nWhen individual estimators are fast to train or predict,\nusing ``n_jobs > 1`` can result in slower performance due\nto the parallelism overhead.\n\n``None`` means `1` unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all available processes / threads.\nSee :term:`Glossary <n_jobs>` for more details.\n\n.. versionchanged:: 0.20\n    `n_jobs` default changed from `1` to `None`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -202501,7 +202501,7 @@
                     "docstring": {
                         "type": "int or array-like of shape (n_features,)",
                         "default_value": "None",
-                        "description": "Minimum number of categories per feature.\n\n- integer: Sets the minimum number of categories per feature to\n`n_categories` for each features.\n- array-like: shape (n_features,) where `n_categories[i]` holds the\nminimum number of categories for the ith column of the input.\n- None (default): Determines the number of categories automatically\nfrom the training data.\n\n.. versionadded:: 0.24"
+                        "description": "Minimum number of categories per feature.\n\n- integer: Sets the minimum number of categories per feature to\n  `n_categories` for each features.\n- array-like: shape (n_features,) where `n_categories[i]` holds the\n  minimum number of categories for the ith column of the input.\n- None (default): Determines the number of categories automatically\n  from the training data.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -203402,7 +203402,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\nhttp://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.",
+            "description": "Gaussian Naive Bayes (GaussianNB).\n\nCan perform online updates to model parameters via :meth:`partial_fit`.\nFor details on algorithm used to update feature means and variance online,\nsee Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:\n\n    http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf\n\nRead more in the :ref:`User Guide <gaussian_naive_bayes>`.",
             "docstring": ""
         },
         {
@@ -203758,7 +203758,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Weights applied to individual samples (1. for unweighted).\n\n.. versionadded:: 0.17\nGaussian Naive Bayes supports fitting with *sample_weight*."
+                        "description": "Weights applied to individual samples (1. for unweighted).\n\n.. versionadded:: 0.17\n   Gaussian Naive Bayes supports fitting with *sample_weight*."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -205211,7 +205211,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "connectivity"]
+                        "values": ["connectivity", "distance"]
                     }
                 }
             ],
@@ -205727,7 +205727,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "connectivity"]
+                        "values": ["connectivity", "distance"]
                     }
                 },
                 {
@@ -205857,7 +205857,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "uniform"]
+                        "values": ["uniform", "distance"]
                     }
                 }
             ],
@@ -205894,7 +205894,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Return whether the graph's non-zero entries are sorted by data.\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n- sorted by indices, as after graph.sort_indices();\n- sorted by data, as after _check_precomputed(graph);\n- not sorted.",
+            "description": "Return whether the graph's non-zero entries are sorted by data.\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n    - sorted by indices, as after graph.sort_indices();\n    - sorted by data, as after _check_precomputed(graph);\n    - not sorted.",
             "docstring": "Return whether the graph's non-zero entries are sorted by data.\n\nThe non-zero entries are stored in graph.data and graph.indices.\nFor each row (or sample), the non-zero entries can be either:\n    - sorted by indices, as after graph.sort_indices();\n    - sorted by data, as after _check_precomputed(graph);\n    - not sorted.\n\nParameters\n----------\ngraph : sparse matrix of shape (n_samples, n_samples)\n    Neighbors graph as given by `kneighbors_graph` or\n    `radius_neighbors_graph`. Matrix should be of format CSR format.\n\nReturns\n-------\nres : bool\n    Whether input graph is sorted by data."
         },
         {
@@ -206182,14 +206182,14 @@
                     "docstring": {
                         "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
-                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights."
+                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["distance", "uniform"]
+                                "values": ["uniform", "distance"]
                             },
                             {
                                 "kind": "NamedType",
@@ -206208,11 +206208,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -206575,14 +206575,14 @@
                     "docstring": {
                         "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
-                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights.\n\nUniform weights are used by default."
+                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["distance", "uniform"]
+                                "values": ["uniform", "distance"]
                             },
                             {
                                 "kind": "NamedType",
@@ -206601,11 +206601,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -206678,7 +206678,7 @@
                     "docstring": {
                         "type": "{manual label, 'most_frequent'}",
                         "default_value": "None",
-                        "description": "Label for outlier samples (samples with no neighbors in given radius).\n\n- manual label: str or int label (should be the same type as y)\nor list of manual labels if multi-output is used.\n- 'most_frequent' : assign the most frequent label of y to outliers.\n- None : when any outlier is detected, ValueError will be raised."
+                        "description": "Label for outlier samples (samples with no neighbors in given radius).\n\n- manual label: str or int label (should be the same type as y)\n  or list of manual labels if multi-output is used.\n- 'most_frequent' : assign the most frequent label of y to outliers.\n- None : when any outlier is detected, ValueError will be raised."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -206729,7 +206729,7 @@
                     "docstring": {
                         "type": "dict",
                         "default_value": "",
-                        "description": "Additional keyword arguments passed to the constructor.\n\n.. deprecated:: 1.0\nThe RadiusNeighborsClassifier class will not longer accept extra\nkeyword parameters in 1.2 since they are unused."
+                        "description": "Additional keyword arguments passed to the constructor.\n\n.. deprecated:: 1.0\n    The RadiusNeighborsClassifier class will not longer accept extra\n    keyword parameters in 1.2 since they are unused."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -207101,11 +207101,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -207135,7 +207135,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -207475,11 +207475,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -207509,7 +207509,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
+                        "description": "Metric to use for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nDistance matrices are not supported.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -207972,7 +207972,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "connectivity"]
+                        "values": ["connectivity", "distance"]
                     }
                 },
                 {
@@ -208139,7 +208139,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "connectivity"]
+                        "values": ["connectivity", "distance"]
                     }
                 },
                 {
@@ -208311,7 +208311,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["cosine", "linear", "tophat", "gaussian", "exponential", "epanechnikov"]
+                        "values": ["linear", "gaussian", "exponential", "tophat", "epanechnikov", "cosine"]
                     }
                 },
                 {
@@ -208809,11 +208809,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -208843,7 +208843,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "'minkowski'",
-                        "description": "The metric is used for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a sparse matrix, in which case only \"nonzero\"\nelements may be considered neighbors.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics:\nhttps://docs.scipy.org/doc/scipy/reference/spatial.distance.html."
+                        "description": "The metric is used for distance computation. Any metric from scikit-learn\nor scipy.spatial.distance can be used.\n\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square. X may be a sparse matrix, in which case only \"nonzero\"\nelements may be considered neighbors.\n\nIf metric is a callable function, it is called on each\npair of instances (rows) and the resulting value recorded. The callable\nshould take two arrays as input and return one value indicating the\ndistance between them. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nValid values for metric are:\n\n- from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',\n  'manhattan']\n\n- from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',\n  'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',\n  'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',\n  'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',\n  'yule']\n\nSee the documentation for scipy.spatial.distance for details on these\nmetrics:\nhttps://docs.scipy.org/doc/scipy/reference/spatial.distance.html."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -208903,7 +208903,7 @@
                     "docstring": {
                         "type": "'auto' or float",
                         "default_value": "'auto'",
-                        "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. When fitting this is used to define the\nthreshold on the scores of the samples.\n\n- if 'auto', the threshold is determined as in the\noriginal paper,\n- if a float, the contamination should be in the range (0, 0.5].\n\n.. versionchanged:: 0.22\nThe default value of ``contamination`` changed from 0.1\nto ``'auto'``."
+                        "description": "The amount of contamination of the data set, i.e. the proportion\nof outliers in the data set. When fitting this is used to define the\nthreshold on the scores of the samples.\n\n- if 'auto', the threshold is determined as in the\n  original paper,\n- if a float, the contamination should be in the range (0, 0.5].\n\n.. versionchanged:: 0.22\n   The default value of ``contamination`` changed from 0.1\n   to ``'auto'``."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -209491,14 +209491,14 @@
                     "docstring": {
                         "type": "{'auto', 'pca', 'lda', 'identity', 'random'} or ndarray of shape             (n_features_a, n_features_b)",
                         "default_value": "'auto'",
-                        "description": "Initialization of the linear transformation. Possible options are\n`'auto'`, `'pca'`, `'lda'`, `'identity'`, `'random'`, and a numpy\narray of shape `(n_features_a, n_features_b)`.\n\n- `'auto'`\nDepending on `n_components`, the most reasonable initialization\nwill be chosen. If `n_components <= n_classes` we use `'lda'`, as\nit uses labels information. If not, but\n`n_components < min(n_features, n_samples)`, we use `'pca'`, as\nit projects data in meaningful directions (those of higher\nvariance). Otherwise, we just use `'identity'`.\n\n- `'pca'`\n`n_components` principal components of the inputs passed\nto :meth:`fit` will be used to initialize the transformation.\n(See :class:`~sklearn.decomposition.PCA`)\n\n- `'lda'`\n`min(n_components, n_classes)` most discriminative\ncomponents of the inputs passed to :meth:`fit` will be used to\ninitialize the transformation. (If `n_components > n_classes`,\nthe rest of the components will be zero.) (See\n:class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n- `'identity'`\nIf `n_components` is strictly smaller than the\ndimensionality of the inputs passed to :meth:`fit`, the identity\nmatrix will be truncated to the first `n_components` rows.\n\n- `'random'`\nThe initial transformation will be a random array of shape\n`(n_components, n_features)`. Each value is sampled from the\nstandard normal distribution.\n\n- numpy array\n`n_features_b` must match the dimensionality of the inputs passed\nto :meth:`fit` and n_features_a must be less than or equal to that.\nIf `n_components` is not `None`, `n_features_a` must match it."
+                        "description": "Initialization of the linear transformation. Possible options are\n`'auto'`, `'pca'`, `'lda'`, `'identity'`, `'random'`, and a numpy\narray of shape `(n_features_a, n_features_b)`.\n\n- `'auto'`\n    Depending on `n_components`, the most reasonable initialization\n    will be chosen. If `n_components <= n_classes` we use `'lda'`, as\n    it uses labels information. If not, but\n    `n_components < min(n_features, n_samples)`, we use `'pca'`, as\n    it projects data in meaningful directions (those of higher\n    variance). Otherwise, we just use `'identity'`.\n\n- `'pca'`\n    `n_components` principal components of the inputs passed\n    to :meth:`fit` will be used to initialize the transformation.\n    (See :class:`~sklearn.decomposition.PCA`)\n\n- `'lda'`\n    `min(n_components, n_classes)` most discriminative\n    components of the inputs passed to :meth:`fit` will be used to\n    initialize the transformation. (If `n_components > n_classes`,\n    the rest of the components will be zero.) (See\n    :class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`)\n\n- `'identity'`\n    If `n_components` is strictly smaller than the\n    dimensionality of the inputs passed to :meth:`fit`, the identity\n    matrix will be truncated to the first `n_components` rows.\n\n- `'random'`\n    The initial transformation will be a random array of shape\n    `(n_components, n_features)`. Each value is sampled from the\n    standard normal distribution.\n\n- numpy array\n    `n_features_b` must match the dimensionality of the inputs passed\n    to :meth:`fit` and n_features_a must be less than or equal to that.\n    If `n_components` is not `None`, `n_features_a` must match it."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["pca", "random", "identity", "auto", "lda"]
+                                "values": ["pca", "lda", "auto", "identity", "random"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210071,7 +210071,7 @@
                     "docstring": {
                         "type": "str or callable",
                         "default_value": "\"euclidean\"",
-                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by\n:func:`~sklearn.metrics.pairwise_distances` for its metric\nparameter. The centroids for the samples corresponding to each class is\nthe point from which the sum of the distances (according to the metric)\nof all samples that belong to that particular class are minimized.\nIf the `\"manhattan\"` metric is provided, this centroid is the median\nand for all other metrics, the centroid is now set to be the mean.\n\n.. versionchanged:: 0.19\n`metric='precomputed'` was deprecated and now raises an error"
+                        "description": "The metric to use when calculating distance between instances in a\nfeature array. If metric is a string or callable, it must be one of\nthe options allowed by\n:func:`~sklearn.metrics.pairwise_distances` for its metric\nparameter. The centroids for the samples corresponding to each class is\nthe point from which the sum of the distances (according to the metric)\nof all samples that belong to that particular class are minimized.\nIf the `\"manhattan\"` metric is provided, this centroid is the median\nand for all other metrics, the centroid is now set to be the mean.\n\n.. versionchanged:: 0.19\n    `metric='precomputed'` was deprecated and now raises an error"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -210281,14 +210281,14 @@
                     "docstring": {
                         "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
-                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights.\n\nUniform weights are used by default."
+                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["distance", "uniform"]
+                                "values": ["uniform", "distance"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210307,11 +210307,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -210621,14 +210621,14 @@
                     "docstring": {
                         "type": "{'uniform', 'distance'} or callable",
                         "default_value": "'uniform'",
-                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights.\n\nUniform weights are used by default."
+                        "description": "Weight function used in prediction.  Possible values:\n\n- 'uniform' : uniform weights.  All points in each neighborhood\n  are weighted equally.\n- 'distance' : weight points by the inverse of their distance.\n  in this case, closer neighbors of a query point will have a\n  greater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\n  array of distances, and returns an array of the same shape\n  containing the weights.\n\nUniform weights are used by default."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["distance", "uniform"]
+                                "values": ["uniform", "distance"]
                             },
                             {
                                 "kind": "NamedType",
@@ -210647,11 +210647,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -210951,11 +210951,11 @@
                     "docstring": {
                         "type": "{'auto', 'ball_tree', 'kd_tree', 'brute'}",
                         "default_value": "'auto'",
-                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
+                        "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\n  based on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["ball_tree", "auto", "kd_tree", "brute"]
+                        "values": ["brute", "kd_tree", "ball_tree", "auto"]
                     }
                 },
                 {
@@ -213596,11 +213596,11 @@
                     "docstring": {
                         "type": "{'identity', 'logistic', 'tanh', 'relu'}",
                         "default_value": "'relu'",
-                        "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\nreturns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\nreturns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\nreturns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\nreturns f(x) = max(0, x)"
+                        "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\n  returns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\n  returns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\n  returns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\n  returns f(x) = max(0, x)"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["relu", "tanh", "identity", "logistic"]
+                        "values": ["identity", "logistic", "tanh", "relu"]
                     }
                 },
                 {
@@ -213613,11 +213613,11 @@
                     "docstring": {
                         "type": "{'lbfgs', 'sgd', 'adam'}",
                         "default_value": "'adam'",
-                        "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed\nby Kingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter."
+                        "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed\n  by Kingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sgd", "adam", "lbfgs"]
+                        "values": ["adam", "lbfgs", "sgd"]
                     }
                 },
                 {
@@ -213664,11 +213664,11 @@
                     "docstring": {
                         "type": "{'constant', 'invscaling', 'adaptive'}",
                         "default_value": "'constant'",
-                        "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate at each\ntime step 't' using an inverse scaling exponent of 'power_t'.\neffective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n'learning_rate_init' as long as training loss keeps decreasing.\nEach time two consecutive epochs fail to decrease training loss by at\nleast tol, or fail to increase validation score by at least tol if\n'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when ``solver='sgd'``."
+                        "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n  'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate at each\n  time step 't' using an inverse scaling exponent of 'power_t'.\n  effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n  'learning_rate_init' as long as training loss keeps decreasing.\n  Each time two consecutive epochs fail to decrease training loss by at\n  least tol, or fail to increase validation score by at least tol if\n  'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when ``solver='sgd'``."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["adaptive", "constant", "invscaling"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -214379,11 +214379,11 @@
                     "docstring": {
                         "type": "{'identity', 'logistic', 'tanh', 'relu'}",
                         "default_value": "'relu'",
-                        "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\nreturns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\nreturns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\nreturns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\nreturns f(x) = max(0, x)"
+                        "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\n  returns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\n  returns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\n  returns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\n  returns f(x) = max(0, x)"
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["relu", "tanh", "identity", "logistic"]
+                        "values": ["identity", "logistic", "tanh", "relu"]
                     }
                 },
                 {
@@ -214396,11 +214396,11 @@
                     "docstring": {
                         "type": "{'lbfgs', 'sgd', 'adam'}",
                         "default_value": "'adam'",
-                        "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed by\nKingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter."
+                        "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed by\n  Kingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["sgd", "adam", "lbfgs"]
+                        "values": ["adam", "lbfgs", "sgd"]
                     }
                 },
                 {
@@ -214447,11 +214447,11 @@
                     "docstring": {
                         "type": "{'constant', 'invscaling', 'adaptive'}",
                         "default_value": "'constant'",
-                        "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate ``learning_rate_``\nat each time step 't' using an inverse scaling exponent of 'power_t'.\neffective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n'learning_rate_init' as long as training loss keeps decreasing.\nEach time two consecutive epochs fail to decrease training loss by at\nleast tol, or fail to increase validation score by at least tol if\n'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when solver='sgd'."
+                        "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n  'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate ``learning_rate_``\n  at each time step 't' using an inverse scaling exponent of 'power_t'.\n  effective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n  'learning_rate_init' as long as training loss keeps decreasing.\n  Each time two consecutive epochs fail to decrease training loss by at\n  least tol, or fail to increase validation score by at least tol if\n  'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when solver='sgd'."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["adaptive", "constant", "invscaling"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -216111,11 +216111,11 @@
                     "docstring": {
                         "type": "{'constant', 'adaptive', 'invscaling'}",
                         "default_value": "'constant'",
-                        "description": "Learning rate schedule for weight updates.\n\n-'constant', is a constant learning rate given by\n'learning_rate_init'.\n\n-'invscaling' gradually decreases the learning rate 'learning_rate_' at\neach time step 't' using an inverse scaling exponent of 'power_t'.\nlearning_rate_ = learning_rate_init / pow(t, power_t)\n\n-'adaptive', keeps the learning rate constant to\n'learning_rate_init' as long as the training keeps decreasing.\nEach time 2 consecutive epochs fail to decrease the training loss by\ntol, or fail to increase validation score by tol if 'early_stopping'\nis on, the current learning rate is divided by 5."
+                        "description": "Learning rate schedule for weight updates.\n\n-'constant', is a constant learning rate given by\n 'learning_rate_init'.\n\n-'invscaling' gradually decreases the learning rate 'learning_rate_' at\n  each time step 't' using an inverse scaling exponent of 'power_t'.\n  learning_rate_ = learning_rate_init / pow(t, power_t)\n\n-'adaptive', keeps the learning rate constant to\n 'learning_rate_init' as long as the training keeps decreasing.\n Each time 2 consecutive epochs fail to decrease the training loss by\n tol, or fail to increase validation score by tol if 'early_stopping'\n is on, the current learning rate is divided by 5."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["adaptive", "constant", "invscaling"]
+                        "values": ["constant", "invscaling", "adaptive"]
                     }
                 },
                 {
@@ -216358,7 +216358,7 @@
                     "docstring": {
                         "type": "list of (str, transformer) tuples",
                         "default_value": "",
-                        "description": "List of transformer objects to be applied to the data. The first\nhalf of each tuple is the name of the transformer. The transformer can\nbe 'drop' for it to be ignored or can be 'passthrough' for features to\nbe passed unchanged.\n\n.. versionadded:: 1.1\nAdded the option `\"passthrough\"`.\n\n.. versionchanged:: 0.22\nDeprecated `None` as a transformer in favor of 'drop'."
+                        "description": "List of transformer objects to be applied to the data. The first\nhalf of each tuple is the name of the transformer. The transformer can\nbe 'drop' for it to be ignored or can be 'passthrough' for features to\nbe passed unchanged.\n\n.. versionadded:: 1.1\n   Added the option `\"passthrough\"`.\n\n.. versionchanged:: 0.22\n   Deprecated `None` as a transformer in favor of 'drop'."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -216375,7 +216375,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -219190,7 +219190,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None"
+                        "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n   `n_jobs` default changed from 1 to None"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -219473,7 +219473,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\nK(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n\\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.",
+            "description": "Center an arbitrary kernel matrix :math:`K`.\n\nLet define a kernel :math:`K` such that:\n\n.. math::\n    K(X, Y) = \\phi(X) . \\phi(Y)^{T}\n\n:math:`\\phi(X)` is a function mapping of rows of :math:`X` to a\nHilbert space and :math:`K` is of shape `(n_samples, n_samples)`.\n\nThis class allows to compute :math:`\\tilde{K}(X, Y)` such that:\n\n.. math::\n    \\tilde{K(X, Y)} = \\tilde{\\phi}(X) . \\tilde{\\phi}(Y)^{T}\n\n:math:`\\tilde{\\phi}(X)` is the centered mapped data in the Hilbert\nspace.\n\n`KernelCenterer` centers the features without explicitly computing the\nmapping :math:`\\phi(\\cdot)`. Working with centered kernels is sometime\nexpected when dealing with algebra computation such as eigendecomposition\nfor :class:`~sklearn.decomposition.KernelPCA` for instance.\n\nRead more in the :ref:`User Guide <kernel_centering>`.",
             "docstring": ""
         },
         {
@@ -220071,7 +220071,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\nX_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\nX_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
+            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, e.g. between\nzero and one.\n\nThe transformation is given by::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": ""
         },
         {
@@ -220372,7 +220372,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1", "max"]
+                        "values": ["max", "l1", "l2"]
                     }
                 },
                 {
@@ -221262,7 +221262,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\nif lambda_ == 0:\nX = exp(X_trans)\nelse:\nX = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\nif X >= 0 and lambda_ == 0:\nX = exp(X_trans) - 1\nelif X >= 0 and lambda_ != 0:\nX = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\nelif X < 0 and lambda_ != 2:\nX = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\nelif X < 0 and lambda_ == 2:\nX = 1 - exp(-X_trans)",
+            "description": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n    if lambda_ == 0:\n        X = exp(X_trans)\n    else:\n        X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n    if X >= 0 and lambda_ == 0:\n        X = exp(X_trans) - 1\n    elif X >= 0 and lambda_ != 0:\n        X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n    elif X < 0 and lambda_ != 2:\n        X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n    elif X < 0 and lambda_ == 2:\n        X = 1 - exp(-X_trans)",
             "docstring": "Apply the inverse power transformation using the fitted lambdas.\n\nThe inverse of the Box-Cox transformation is given by::\n\n    if lambda_ == 0:\n        X = exp(X_trans)\n    else:\n        X = (X_trans * lambda_ + 1) ** (1 / lambda_)\n\nThe inverse of the Yeo-Johnson transformation is given by::\n\n    if X >= 0 and lambda_ == 0:\n        X = exp(X_trans) - 1\n    elif X >= 0 and lambda_ != 0:\n        X = (X_trans * lambda_ + 1) ** (1 / lambda_) - 1\n    elif X < 0 and lambda_ != 2:\n        X = 1 - (-(2 - lambda_) * X_trans + 1) ** (1 / (2 - lambda_))\n    elif X < 0 and lambda_ == 2:\n        X = 1 - exp(-X_trans)\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The transformed data.\n\nReturns\n-------\nX : ndarray of shape (n_samples, n_features)\n    The original data."
         },
         {
@@ -221360,7 +221360,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["normal", "uniform"]
+                        "values": ["uniform", "normal"]
                     }
                 },
                 {
@@ -222382,7 +222382,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\nz = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
+            "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\n    z = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.",
             "docstring": ""
         },
         {
@@ -222512,7 +222512,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\nparameter *sample_weight* support to StandardScaler."
+                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\n   parameter *sample_weight* support to StandardScaler."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -222669,7 +222669,7 @@
                     "docstring": {
                         "type": "array-like of shape (n_samples,)",
                         "default_value": "None",
-                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\nparameter *sample_weight* support to StandardScaler."
+                        "description": "Individual weights for each sample.\n\n.. versionadded:: 0.24\n   parameter *sample_weight* support to StandardScaler."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -223143,7 +223143,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.preprocessing"],
-            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\nX_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\nX_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\nX_scaled = scale * X + min - X.min(axis=0) * scale\nwhere scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\n.. versionadded:: 0.17\n*minmax_scale* function interface\nto :class:`~sklearn.preprocessing.MinMaxScaler`.",
+            "description": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n   X_scaled = scale * X + min - X.min(axis=0) * scale\n   where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\n.. versionadded:: 0.17\n   *minmax_scale* function interface\n   to :class:`~sklearn.preprocessing.MinMaxScaler`.",
             "docstring": "Transform features by scaling each feature to a given range.\n\nThis estimator scales and translates each feature individually such\nthat it is in the given range on the training set, i.e. between\nzero and one.\n\nThe transformation is given by (when ``axis=0``)::\n\n    X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))\n    X_scaled = X_std * (max - min) + min\n\nwhere min, max = feature_range.\n\nThe transformation is calculated as (when ``axis=0``)::\n\n   X_scaled = scale * X + min - X.min(axis=0) * scale\n   where scale = (max - min) / (X.max(axis=0) - X.min(axis=0))\n\nThis transformation is often used as an alternative to zero mean,\nunit variance scaling.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n\n.. versionadded:: 0.17\n   *minmax_scale* function interface\n   to :class:`~sklearn.preprocessing.MinMaxScaler`.\n\nParameters\n----------\nX : array-like of shape (n_samples, n_features)\n    The data.\n\nfeature_range : tuple (min, max), default=(0, 1)\n    Desired range of transformed data.\n\naxis : int, default=0\n    Axis used to scale along. If 0, independently scale each feature,\n    otherwise (if 1) scale each sample.\n\ncopy : bool, default=True\n    Set to False to perform inplace scaling and avoid a copy (if the input\n    is already a numpy array).\n\nReturns\n-------\nX_tr : ndarray of shape (n_samples, n_features)\n    The transformed data.\n\n.. warning:: Risk of data leak\n\n    Do not use :func:`~sklearn.preprocessing.minmax_scale` unless you know\n    what you are doing. A common mistake is to apply it to the entire data\n    *before* splitting into training and test sets. This will bias the\n    model evaluation because information would have leaked from the test\n    set to the training set.\n    In general, we recommend using\n    :class:`~sklearn.preprocessing.MinMaxScaler` within a\n    :ref:`Pipeline <pipeline>` in order to prevent most risks of data\n    leaking: `pipe = make_pipeline(MinMaxScaler(), LogisticRegression())`.\n\nSee Also\n--------\nMinMaxScaler : Performs scaling to a given range using the Transformer\n    API (e.g. as part of a preprocessing\n    :class:`~sklearn.pipeline.Pipeline`).\n\nNotes\n-----\nFor a comparison of the different scalers, transformers, and normalizers,\nsee :ref:`examples/preprocessing/plot_all_scaling.py\n<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`."
         },
         {
@@ -223192,7 +223192,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1", "max"]
+                        "values": ["max", "l1", "l2"]
                     }
                 },
                 {
@@ -223286,7 +223286,7 @@
                     "docstring": {
                         "type": "{'yeo-johnson', 'box-cox'}",
                         "default_value": "'yeo-johnson'",
-                        "description": "The power transform method. Available methods are:\n\n- 'yeo-johnson' [1]_, works with positive and negative values\n- 'box-cox' [2]_, only works with strictly positive values\n\n.. versionchanged:: 0.23\nThe default value of the `method` parameter changed from\n'box-cox' to 'yeo-johnson' in 0.23."
+                        "description": "The power transform method. Available methods are:\n\n- 'yeo-johnson' [1]_, works with positive and negative values\n- 'box-cox' [2]_, only works with strictly positive values\n\n.. versionchanged:: 0.23\n    The default value of the `method` parameter changed from\n    'box-cox' to 'yeo-johnson' in 0.23."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -223414,7 +223414,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["normal", "uniform"]
+                        "values": ["uniform", "normal"]
                     }
                 },
                 {
@@ -223491,7 +223491,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "True",
-                        "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array). If True, a copy of `X` is transformed,\nleaving the original `X` unchanged\n\n..versionchanged:: 0.23\nThe default value of `copy` changed from False to True in 0.23."
+                        "description": "Set to False to perform inplace transformation and avoid a copy (if the\ninput is already a numpy array). If True, a copy of `X` is transformed,\nleaving the original `X` unchanged\n\n..versionchanged:: 0.23\n    The default value of `copy` changed from False to True in 0.23."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -223818,11 +223818,11 @@
                     "docstring": {
                         "type": "{'onehot', 'onehot-dense', 'ordinal'}",
                         "default_value": "'onehot'",
-                        "description": "Method used to encode the transformed result.\n\n- 'onehot': Encode the transformed result with one-hot encoding\nand return a sparse matrix. Ignored features are always\nstacked to the right.\n- 'onehot-dense': Encode the transformed result with one-hot encoding\nand return a dense array. Ignored features are always\nstacked to the right.\n- 'ordinal': Return the bin identifier encoded as an integer value."
+                        "description": "Method used to encode the transformed result.\n\n- 'onehot': Encode the transformed result with one-hot encoding\n  and return a sparse matrix. Ignored features are always\n  stacked to the right.\n- 'onehot-dense': Encode the transformed result with one-hot encoding\n  and return a dense array. Ignored features are always\n  stacked to the right.\n- 'ordinal': Return the bin identifier encoded as an integer value."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["onehot", "onehot-dense", "ordinal"]
+                        "values": ["onehot-dense", "ordinal", "onehot"]
                     }
                 },
                 {
@@ -223835,11 +223835,11 @@
                     "docstring": {
                         "type": "{'uniform', 'quantile', 'kmeans'}",
                         "default_value": "'quantile'",
-                        "description": "Strategy used to define the widths of the bins.\n\n- 'uniform': All bins in each feature have identical widths.\n- 'quantile': All bins in each feature have the same number of points.\n- 'kmeans': Values in each bin have the same nearest center of a 1D\nk-means cluster."
+                        "description": "Strategy used to define the widths of the bins.\n\n- 'uniform': All bins in each feature have identical widths.\n- 'quantile': All bins in each feature have the same number of points.\n- 'kmeans': Values in each bin have the same nearest center of a 1D\n  k-means cluster."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["kmeans", "uniform", "quantile"]
+                        "values": ["uniform", "kmeans", "quantile"]
                     }
                 },
                 {
@@ -223869,7 +223869,7 @@
                     "docstring": {
                         "type": "int or None (default='warn')",
                         "default_value": "",
-                        "description": "Maximum number of samples, used to fit the model, for computational\nefficiency. Used when `strategy=\"quantile\"`.\n`subsample=None` means that all the training samples are used when\ncomputing the quantiles that determine the binning thresholds.\nSince quantile computation relies on sorting each column of `X` and\nthat sorting has an `n log(n)` time complexity,\nit is recommended to use subsampling on datasets with a\nvery large number of samples.\n\n.. deprecated:: 1.1\nIn version 1.3 and onwards, `subsample=2e5` will be the default."
+                        "description": "Maximum number of samples, used to fit the model, for computational\nefficiency. Used when `strategy=\"quantile\"`.\n`subsample=None` means that all the training samples are used when\ncomputing the quantiles that determine the binning thresholds.\nSince quantile computation relies on sorting each column of `X` and\nthat sorting has an `n log(n)` time complexity,\nit is recommended to use subsampling on datasets with a\nvery large number of samples.\n\n.. deprecated:: 1.1\n   In version 1.3 and onwards, `subsample=2e5` will be the default."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -224054,7 +224054,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -224195,7 +224195,7 @@
                     "docstring": {
                         "type": "'auto' or a list of array-like",
                         "default_value": "'auto'",
-                        "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\ncolumn. The passed categories should not mix strings and numeric\nvalues within a single feature, and should be sorted in case of\nnumeric values.\n\nThe used categories can be found in the ``categories_`` attribute.\n\n.. versionadded:: 0.20"
+                        "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\n  column. The passed categories should not mix strings and numeric\n  values within a single feature, and should be sorted in case of\n  numeric values.\n\nThe used categories can be found in the ``categories_`` attribute.\n\n.. versionadded:: 0.20"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -224221,14 +224221,14 @@
                     "docstring": {
                         "type": "{'first', 'if_binary'} or an array-like of shape (n_features,)",
                         "default_value": "None",
-                        "description": "Specifies a methodology to use to drop one of the categories per\nfeature. This is useful in situations where perfectly collinear\nfeatures cause problems, such as when feeding the resulting data\ninto an unregularized linear regression model.\n\nHowever, dropping one category breaks the symmetry of the original\nrepresentation and can therefore induce a bias in downstream models,\nfor instance for penalized linear classification or regression models.\n\n- None : retain all features (the default).\n- 'first' : drop the first category in each feature. If only one\ncategory is present, the feature will be dropped entirely.\n- 'if_binary' : drop the first category in each feature with two\ncategories. Features with 1 or more than 2 categories are\nleft intact.\n- array : ``drop[i]`` is the category in feature ``X[:, i]`` that\nshould be dropped.\n\n.. versionadded:: 0.21\nThe parameter `drop` was added in 0.21.\n\n.. versionchanged:: 0.23\nThe option `drop='if_binary'` was added in 0.23.\n\n.. versionchanged:: 1.1\nSupport for dropping infrequent categories."
+                        "description": "Specifies a methodology to use to drop one of the categories per\nfeature. This is useful in situations where perfectly collinear\nfeatures cause problems, such as when feeding the resulting data\ninto an unregularized linear regression model.\n\nHowever, dropping one category breaks the symmetry of the original\nrepresentation and can therefore induce a bias in downstream models,\nfor instance for penalized linear classification or regression models.\n\n- None : retain all features (the default).\n- 'first' : drop the first category in each feature. If only one\n  category is present, the feature will be dropped entirely.\n- 'if_binary' : drop the first category in each feature with two\n  categories. Features with 1 or more than 2 categories are\n  left intact.\n- array : ``drop[i]`` is the category in feature ``X[:, i]`` that\n  should be dropped.\n\n.. versionadded:: 0.21\n   The parameter `drop` was added in 0.21.\n\n.. versionchanged:: 0.23\n   The option `drop='if_binary'` was added in 0.23.\n\n.. versionchanged:: 1.1\n    Support for dropping infrequent categories."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["if_binary", "first"]
+                                "values": ["first", "if_binary"]
                             },
                             {
                                 "kind": "NamedType",
@@ -224281,7 +224281,7 @@
                     "docstring": {
                         "type": "{'error', 'ignore', 'infrequent_if_exist'}",
                         "default_value": "'error'",
-                        "description": "Specifies the way unknown categories are handled during :meth:`transform`.\n\n- 'error' : Raise an error if an unknown category is present during transform.\n- 'ignore' : When an unknown category is encountered during\ntransform, the resulting one-hot encoded columns for this feature\nwill be all zeros. In the inverse transform, an unknown category\nwill be denoted as None.\n- 'infrequent_if_exist' : When an unknown category is encountered\nduring transform, the resulting one-hot encoded columns for this\nfeature will map to the infrequent category if it exists. The\ninfrequent category will be mapped to the last position in the\nencoding. During inverse transform, an unknown category will be\nmapped to the category denoted `'infrequent'` if it exists. If the\n`'infrequent'` category does not exist, then :meth:`transform` and\n:meth:`inverse_transform` will handle an unknown category as with\n`handle_unknown='ignore'`. Infrequent categories exist based on\n`min_frequency` and `max_categories`. Read more in the\n:ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n.. versionchanged:: 1.1\n`'infrequent_if_exist'` was added to automatically handle unknown\ncategories and infrequent categories."
+                        "description": "Specifies the way unknown categories are handled during :meth:`transform`.\n\n- 'error' : Raise an error if an unknown category is present during transform.\n- 'ignore' : When an unknown category is encountered during\n  transform, the resulting one-hot encoded columns for this feature\n  will be all zeros. In the inverse transform, an unknown category\n  will be denoted as None.\n- 'infrequent_if_exist' : When an unknown category is encountered\n  during transform, the resulting one-hot encoded columns for this\n  feature will map to the infrequent category if it exists. The\n  infrequent category will be mapped to the last position in the\n  encoding. During inverse transform, an unknown category will be\n  mapped to the category denoted `'infrequent'` if it exists. If the\n  `'infrequent'` category does not exist, then :meth:`transform` and\n  :meth:`inverse_transform` will handle an unknown category as with\n  `handle_unknown='ignore'`. Infrequent categories exist based on\n  `min_frequency` and `max_categories`. Read more in the\n  :ref:`User Guide <one_hot_encoder_infrequent_categories>`.\n\n.. versionchanged:: 1.1\n    `'infrequent_if_exist'` was added to automatically handle unknown\n    categories and infrequent categories."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -224298,7 +224298,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "None",
-                        "description": "Specifies the minimum frequency below which a category will be\nconsidered infrequent.\n\n- If `int`, categories with a smaller cardinality will be considered\ninfrequent.\n\n- If `float`, categories with a smaller cardinality than\n`min_frequency * n_samples`  will be considered infrequent.\n\n.. versionadded:: 1.1\nRead more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`."
+                        "description": "Specifies the minimum frequency below which a category will be\nconsidered infrequent.\n\n- If `int`, categories with a smaller cardinality will be considered\n  infrequent.\n\n- If `float`, categories with a smaller cardinality than\n  `min_frequency * n_samples`  will be considered infrequent.\n\n.. versionadded:: 1.1\n    Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -224324,7 +224324,7 @@
                     "docstring": {
                         "type": "int",
                         "default_value": "None",
-                        "description": "Specifies an upper limit to the number of output features for each input\nfeature when considering infrequent categories. If there are infrequent\ncategories, `max_categories` includes the category representing the\ninfrequent categories along with the frequent categories. If `None`,\nthere is no limit to the number of output features.\n\n.. versionadded:: 1.1\nRead more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`."
+                        "description": "Specifies an upper limit to the number of output features for each input\nfeature when considering infrequent categories. If there are infrequent\ncategories, `max_categories` includes the category representing the\ninfrequent categories along with the frequent categories. If `None`,\nthere is no limit to the number of output features.\n\n.. versionadded:: 1.1\n    Read more in the :ref:`User Guide <one_hot_encoder_infrequent_categories>`."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -224362,7 +224362,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute the drop indices associated with `self.categories_`.\n\nIf `self.drop` is:\n- `None`, returns `None`.\n- `'first'`, returns all zeros to drop the first category.\n- `'if_binary'`, returns zero if the category is binary and `None`\notherwise.\n- array-like, returns the indices of the categories that match the\ncategories in `self.drop`. If the dropped category is an infrequent\ncategory, then the index for the infrequent category is used. This\nmeans that the entire infrequent category is dropped.",
+            "description": "Compute the drop indices associated with `self.categories_`.\n\nIf `self.drop` is:\n- `None`, returns `None`.\n- `'first'`, returns all zeros to drop the first category.\n- `'if_binary'`, returns zero if the category is binary and `None`\n  otherwise.\n- array-like, returns the indices of the categories that match the\n  categories in `self.drop`. If the dropped category is an infrequent\n  category, then the index for the infrequent category is used. This\n  means that the entire infrequent category is dropped.",
             "docstring": "Compute the drop indices associated with `self.categories_`.\n\nIf `self.drop` is:\n- `None`, returns `None`.\n- `'first'`, returns all zeros to drop the first category.\n- `'if_binary'`, returns zero if the category is binary and `None`\n  otherwise.\n- array-like, returns the indices of the categories that match the\n  categories in `self.drop`. If the dropped category is an infrequent\n  category, then the index for the infrequent category is used. This\n  means that the entire infrequent category is dropped."
         },
         {
@@ -224973,7 +224973,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -225150,7 +225150,7 @@
                     "docstring": {
                         "type": "'auto' or a list of array-like",
                         "default_value": "'auto'",
-                        "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\ncolumn. The passed categories should not mix strings and numeric\nvalues, and should be sorted in case of numeric values.\n\nThe used categories can be found in the ``categories_`` attribute."
+                        "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\n  column. The passed categories should not mix strings and numeric\n  values, and should be sorted in case of numeric values.\n\nThe used categories can be found in the ``categories_`` attribute."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -225197,7 +225197,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["use_encoded_value", "error"]
+                        "values": ["error", "use_encoded_value"]
                     }
                 },
                 {
@@ -225460,7 +225460,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\nnot do that)\n- return list of features (arrays): this list of features is\nconstructed feature by feature to preserve the data types\nof pandas DataFrame columns, as otherwise information is lost\nand cannot be used, e.g. for the `categories_` attribute.",
+            "description": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n  not do that)\n- return list of features (arrays): this list of features is\n  constructed feature by feature to preserve the data types\n  of pandas DataFrame columns, as otherwise information is lost\n  and cannot be used, e.g. for the `categories_` attribute.",
             "docstring": "Perform custom check_array:\n- convert list of strings to object dtype\n- check for missing values for object dtype data (check_array does\n  not do that)\n- return list of features (arrays): this list of features is\n  constructed feature by feature to preserve the data types\n  of pandas DataFrame columns, as otherwise information is lost\n  and cannot be used, e.g. for the `categories_` attribute."
         },
         {
@@ -225775,7 +225775,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "Indicate that the input X array should be checked before calling\n``func``. The possibilities are:\n\n- If False, there is no input validation.\n- If True, then X will be converted to a 2-dimensional NumPy array or\nsparse matrix. If the conversion is not possible an exception is\nraised.\n\n.. versionchanged:: 0.22\nThe default of ``validate`` changed from True to False."
+                        "description": "Indicate that the input X array should be checked before calling\n``func``. The possibilities are:\n\n- If False, there is no input validation.\n- If True, then X will be converted to a 2-dimensional NumPy array or\n  sparse matrix. If the conversion is not possible an exception is\n  raised.\n\n.. versionchanged:: 0.22\n   The default of ``validate`` changed from True to False."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -226206,7 +226206,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input feature names.\n\n- If `input_features` is None, then `feature_names_in_` is\nused as the input feature names. If `feature_names_in_` is not\ndefined, then names are generated:\n`[x0, x1, ..., x(n_features_in_ - 1)]`.\n- If `input_features` is array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input feature names.\n\n- If `input_features` is None, then `feature_names_in_` is\n  used as the input feature names. If `feature_names_in_` is not\n  defined, then names are generated:\n  `[x0, x1, ..., x(n_features_in_ - 1)]`.\n- If `input_features` is array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -227515,7 +227515,7 @@
                     "docstring": {
                         "type": "bool",
                         "default_value": "False",
-                        "description": "If `True`, only interaction features are produced: features that are\nproducts of at most `degree` *distinct* input features, i.e. terms with\npower of 2 or higher of the same input feature are excluded:\n\n- included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n- excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc."
+                        "description": "If `True`, only interaction features are produced: features that are\nproducts of at most `degree` *distinct* input features, i.e. terms with\npower of 2 or higher of the same input feature are excluded:\n\n    - included: `x[0]`, `x[1]`, `x[0] * x[1]`, etc.\n    - excluded: `x[0] ** 2`, `x[0] ** 2 * x[1]`, etc."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -227553,7 +227553,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 }
             ],
@@ -227875,7 +227875,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features is None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features is None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -228072,7 +228072,7 @@
                     "docstring": {
                         "type": "{'uniform', 'quantile'} or array-like of shape         (n_knots, n_features)",
                         "default_value": "'uniform'",
-                        "description": "Set knot positions such that first knot <= features <= last knot.\n\n- If 'uniform', `n_knots` number of knots are distributed uniformly\nfrom min to max values of the features.\n- If 'quantile', they are distributed uniformly along the quantiles of\nthe features.\n- If an array-like is given, it directly specifies the sorted knot\npositions including the boundary knots. Note that, internally,\n`degree` number of knots are added before the first knot, the same\nafter the last knot."
+                        "description": "Set knot positions such that first knot <= features <= last knot.\n\n- If 'uniform', `n_knots` number of knots are distributed uniformly\n  from min to max values of the features.\n- If 'quantile', they are distributed uniformly along the quantiles of\n  the features.\n- If an array-like is given, it directly specifies the sorted knot\n  positions including the boundary knots. Note that, internally,\n  `degree` number of knots are added before the first knot, the same\n  after the last knot."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -228102,7 +228102,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["linear", "constant", "continue", "error", "periodic"]
+                        "values": ["periodic", "linear", "continue", "constant", "error"]
                     }
                 },
                 {
@@ -228136,7 +228136,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 }
             ],
@@ -228369,7 +228369,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -229221,7 +229221,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n- -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n-  0                              with probability 1 - 1 / s\n- +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13",
+            "description": "Reduce dimensionality through sparse random projection.\n\nSparse random matrix is an alternative to dense random\nprojection matrix that guarantees similar embedding quality while being\nmuch more memory efficient and allowing faster computation of the\nprojected data.\n\nIf we note `s = 1 / density` the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\n.. versionadded:: 0.13",
             "docstring": ""
         },
         {
@@ -229494,7 +229494,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\nN(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide <gaussian_random_matrix>`.",
+            "description": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n    N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide <gaussian_random_matrix>`.",
             "docstring": "Generate a dense Gaussian random matrix.\n\nThe components of the random matrix are drawn from\n\n    N(0, 1.0 / n_components).\n\nRead more in the :ref:`User Guide <gaussian_random_matrix>`.\n\nParameters\n----------\nn_components : int,\n    Dimensionality of the target projection space.\n\nn_features : int,\n    Dimensionality of the original source space.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generator used to generate the matrix\n    at fit time.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\ncomponents : ndarray of shape (n_components, n_features)\n    The generated Gaussian random matrix.\n\nSee Also\n--------\nGaussianRandomProjection"
         },
         {
@@ -229597,7 +229597,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n- -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n-  0                              with probability 1 - 1 / s\n- +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.",
+            "description": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.",
             "docstring": "Generalized Achlioptas random sparse matrix for random projection.\n\nSetting density to 1 / 3 will yield the original matrix by Dimitris\nAchlioptas while setting a lower value will yield the generalization\nby Ping Li et al.\n\nIf we note :math:`s = 1 / density`, the components of the random matrix are\ndrawn from:\n\n  - -sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n  -  0                              with probability 1 - 1 / s\n  - +sqrt(s) / sqrt(n_components)   with probability 1 / 2s\n\nRead more in the :ref:`User Guide <sparse_random_matrix>`.\n\nParameters\n----------\nn_components : int,\n    Dimensionality of the target projection space.\n\nn_features : int,\n    Dimensionality of the original source space.\n\ndensity : float or 'auto', default='auto'\n    Ratio of non-zero component in the random projection matrix in the\n    range `(0, 1]`\n\n    If density = 'auto', the value is set to the minimum density\n    as recommended by Ping Li et al.: 1 / sqrt(n_features).\n\n    Use density = 1 / 3.0 if you want to reproduce the results from\n    Achlioptas, 2001.\n\nrandom_state : int, RandomState instance or None, default=None\n    Controls the pseudo random number generator used to generate the matrix\n    at fit time.\n    Pass an int for reproducible output across multiple function calls.\n    See :term:`Glossary <random_state>`.\n\nReturns\n-------\ncomponents : {ndarray, sparse matrix} of shape (n_components, n_features)\n    The generated Gaussian random matrix. Sparse matrix will be of CSR\n    format.\n\nSee Also\n--------\nSparseRandomProjection\n\nReferences\n----------\n\n.. [1] Ping Li, T. Hastie and K. W. Church, 2006,\n       \"Very Sparse Random Projections\".\n       https://web.stanford.edu/~hastie/Papers/Ping/KDD06_rp.pdf\n\n.. [2] D. Achlioptas, 2001, \"Database-friendly random projections\",\n       http://www.cs.ucsc.edu/~optas/papers/jl.pdf"
         },
         {
@@ -229666,7 +229666,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n(1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\nn_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.",
+            "description": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n  (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n  n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.",
             "docstring": "Find a 'safe' number of components to randomly project to.\n\nThe distortion introduced by a random projection `p` only changes the\ndistance between two points by a factor (1 +- eps) in an euclidean space\nwith good probability. The projection `p` is an eps-embedding as defined\nby:\n\n  (1 - eps) ||u - v||^2 < ||p(u) - p(v)||^2 < (1 + eps) ||u - v||^2\n\nWhere u and v are any rows taken from a dataset of shape (n_samples,\nn_features), eps is in ]0, 1[ and p is a projection by a random Gaussian\nN(0, 1) matrix of shape (n_components, n_features) (or a sparse\nAchlioptas matrix).\n\nThe minimum number of components to guarantee the eps-embedding is\ngiven by:\n\n  n_components >= 4 log(n_samples) / (eps^2 / 2 - eps^3 / 3)\n\nNote that the number of dimensions is independent of the original\nnumber of features but instead depends on the size of the dataset:\nthe larger the dataset, the higher is the minimal dimensionality of\nan eps-embedding.\n\nRead more in the :ref:`User Guide <johnson_lindenstrauss>`.\n\nParameters\n----------\nn_samples : int or array-like of int\n    Number of samples that should be a integer greater than 0. If an array\n    is given, it will compute a safe number of components array-wise.\n\neps : float or ndarray of shape (n_components,), dtype=float,             default=0.1\n    Maximum distortion rate in the range (0,1 ) as defined by the\n    Johnson-Lindenstrauss lemma. If an array is given, it will compute a\n    safe number of components array-wise.\n\nReturns\n-------\nn_components : int or ndarray of int\n    The minimal number of components to guarantee with good probability\n    an eps-embedding with n_samples.\n\nExamples\n--------\n>>> from sklearn.random_projection import johnson_lindenstrauss_min_dim\n>>> johnson_lindenstrauss_min_dim(1e6, eps=0.5)\n663\n\n>>> johnson_lindenstrauss_min_dim(1e6, eps=[0.5, 0.1, 0.01])\narray([    663,   11841, 1112658])\n\n>>> johnson_lindenstrauss_min_dim([1e4, 1e5, 1e6], eps=0.1)\narray([ 7894,  9868, 11841])\n\nReferences\n----------\n\n.. [1] https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma\n\n.. [2] Sanjoy Dasgupta and Anupam Gupta, 1999,\n       \"An elementary proof of the Johnson-Lindenstrauss Lemma.\"\n       http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.45.3654"
         },
         {
@@ -229699,14 +229699,14 @@
                     "docstring": {
                         "type": "{'knn', 'rbf'} or callable",
                         "default_value": "'rbf'",
-                        "description": "String identifier for kernel function to use or the kernel function\nitself. Only 'rbf' and 'knn' strings are valid inputs. The function\npassed should take two inputs, each of shape (n_samples, n_features),\nand return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\nParameter for rbf kernel.\n\nn_neighbors : int, default=7\nParameter for knn kernel. Need to be strictly positive.\n\nalpha : float, default=1.0\nClamping factor.\n\nmax_iter : int, default=30\nChange maximum number of iterations allowed.\n\ntol : float, default=1e-3\nConvergence tolerance: threshold to consider the system at steady\nstate."
+                        "description": "    String identifier for kernel function to use or the kernel function\n    itself. Only 'rbf' and 'knn' strings are valid inputs. The function\n    passed should take two inputs, each of shape (n_samples, n_features),\n    and return a (n_samples, n_samples) shaped weight matrix.\n\ngamma : float, default=20\n    Parameter for rbf kernel.\n\nn_neighbors : int, default=7\n    Parameter for knn kernel. Need to be strictly positive.\n\nalpha : float, default=1.0\n    Clamping factor.\n\nmax_iter : int, default=30\n    Change maximum number of iterations allowed.\n\ntol : float, default=1e-3\n    Convergence tolerance: threshold to consider the system at steady\n    state."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["rbf", "knn"]
+                                "values": ["knn", "rbf"]
                             },
                             {
                                 "kind": "NamedType",
@@ -230077,7 +230077,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["rbf", "knn"]
+                                "values": ["knn", "rbf"]
                             },
                             {
                                 "kind": "NamedType",
@@ -230312,7 +230312,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["rbf", "knn"]
+                                "values": ["knn", "rbf"]
                             },
                             {
                                 "kind": "NamedType",
@@ -232896,7 +232896,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1"]
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -233138,7 +233138,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n- multi_class\n- penalty\n- loss\n- dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use.",
+            "description": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n  - multi_class\n  - penalty\n  - loss\n  - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use.",
             "docstring": "Find the liblinear magic number for the solver.\n\nThis number depends on the values of the following attributes:\n  - multi_class\n  - penalty\n  - loss\n  - dual\n\nThe same number is also internally used by LibLinear to determine\nwhich solver to use."
         },
         {
@@ -233337,7 +233337,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["l2", "l1"]
+                        "values": ["l1", "l2"]
                     }
                 },
                 {
@@ -234061,7 +234061,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["poly", "linear", "precomputed", "sigmoid", "rbf"]
+                                "values": ["rbf", "linear", "poly", "precomputed", "sigmoid"]
                             },
                             {
                                 "kind": "NamedType",
@@ -234097,7 +234097,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -234259,7 +234259,7 @@
                     "docstring": {
                         "type": "{'ovo', 'ovr'}",
                         "default_value": "'ovr'",
-                        "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n('ovo') is always used as multi-class strategy. The parameter is\nignored for binary classification.\n\n.. versionchanged:: 0.19\ndecision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n*decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\nDeprecated *decision_function_shape='ovo' and None*."
+                        "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n('ovo') is always used as multi-class strategy. The parameter is\nignored for binary classification.\n\n.. versionchanged:: 0.19\n    decision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n   *decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\n   Deprecated *decision_function_shape='ovo' and None*."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -234431,7 +234431,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["poly", "linear", "precomputed", "sigmoid", "rbf"]
+                                "values": ["rbf", "linear", "poly", "precomputed", "sigmoid"]
                             },
                             {
                                 "kind": "NamedType",
@@ -234467,7 +234467,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -234656,7 +234656,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["poly", "linear", "precomputed", "sigmoid", "rbf"]
+                                "values": ["rbf", "linear", "poly", "precomputed", "sigmoid"]
                             },
                             {
                                 "kind": "NamedType",
@@ -234692,7 +234692,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -235008,7 +235008,7 @@
                     "docstring": {
                         "type": "dict",
                         "default_value": "",
-                        "description": "Additional fit parameters.\n\n.. deprecated:: 1.0\nThe `fit` method will not longer accept extra keyword\nparameters in 1.2. These keyword parameters were\nalready discarded."
+                        "description": "Additional fit parameters.\n\n.. deprecated:: 1.0\n    The `fit` method will not longer accept extra keyword\n    parameters in 1.2. These keyword parameters were\n    already discarded."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -235173,7 +235173,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["poly", "linear", "precomputed", "sigmoid", "rbf"]
+                                "values": ["rbf", "linear", "poly", "precomputed", "sigmoid"]
                             },
                             {
                                 "kind": "NamedType",
@@ -235209,7 +235209,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -235380,7 +235380,7 @@
                     "docstring": {
                         "type": "{'ovo', 'ovr'}",
                         "default_value": "'ovr'",
-                        "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, note that\ninternally, one-vs-one ('ovo') is always used as a multi-class strategy\nto train models; an ovr matrix is only constructed from the ovo matrix.\nThe parameter is ignored for binary classification.\n\n.. versionchanged:: 0.19\ndecision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n*decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\nDeprecated *decision_function_shape='ovo' and None*."
+                        "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, note that\ninternally, one-vs-one ('ovo') is always used as a multi-class strategy\nto train models; an ovr matrix is only constructed from the ovo matrix.\nThe parameter is ignored for binary classification.\n\n.. versionchanged:: 0.19\n    decision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n   *decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\n   Deprecated *decision_function_shape='ovo' and None*."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -235505,7 +235505,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["poly", "linear", "precomputed", "sigmoid", "rbf"]
+                                "values": ["rbf", "linear", "poly", "precomputed", "sigmoid"]
                             },
                             {
                                 "kind": "NamedType",
@@ -235541,7 +235541,7 @@
                     "docstring": {
                         "type": "{'scale', 'auto'} or float",
                         "default_value": "'scale'",
-                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'."
+                        "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n  1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\n   The default value of ``gamma`` changed from 'auto' to 'scale'."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -236540,7 +236540,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["entropy", "log_loss", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -236557,7 +236557,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "best"]
+                        "values": ["best", "random"]
                     }
                 },
                 {
@@ -236587,7 +236587,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -236613,7 +236613,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -236656,14 +236656,14 @@
                     "docstring": {
                         "type": "int, float or {\"auto\", \"sqrt\", \"log2\"}",
                         "default_value": "None",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `int(max_features * n_features)` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -236733,7 +236733,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -237108,11 +237108,11 @@
                     "docstring": {
                         "type": "{\"squared_error\", \"friedman_mse\", \"absolute_error\",             \"poisson\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\n\n.. versionadded:: 0.18\nMean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\nPoisson deviance criterion.\n\n.. deprecated:: 1.0\nCriterion \"mse\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\nCriterion \"mae\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and minimizes the L2\nloss using the mean of each terminal node, \"friedman_mse\", which uses\nmean squared error with Friedman's improvement score for potential\nsplits, \"absolute_error\" for the mean absolute error, which minimizes\nthe L1 loss using the median of each terminal node, and \"poisson\" which\nuses reduction in Poisson deviance to find splits.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["absolute_error", "poisson", "squared_error", "friedman_mse"]
+                        "values": ["squared_error", "friedman_mse", "absolute_error", "poisson"]
                     }
                 },
                 {
@@ -237129,7 +237129,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "best"]
+                        "values": ["best", "random"]
                     }
                 },
                 {
@@ -237159,7 +237159,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237185,7 +237185,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237228,14 +237228,14 @@
                     "docstring": {
                         "type": "int, float or {\"auto\", \"sqrt\", \"log2\"}",
                         "default_value": "None",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `int(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -237305,7 +237305,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -237565,7 +237565,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["entropy", "log_loss", "gini"]
+                        "values": ["entropy", "gini", "log_loss"]
                     }
                 },
                 {
@@ -237582,7 +237582,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "best"]
+                        "values": ["best", "random"]
                     }
                 },
                 {
@@ -237612,7 +237612,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237638,7 +237638,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237681,14 +237681,14 @@
                     "docstring": {
                         "type": "int, float, {\"auto\", \"sqrt\", \"log2\"} or None",
                         "default_value": "\"sqrt\"",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n    - If int, then consider `max_features` features at each split.\n    - If float, then `max_features` is a fraction and\n      `int(max_features * n_features)` features are considered at each\n      split.\n    - If \"auto\", then `max_features=sqrt(n_features)`.\n    - If \"sqrt\", then `max_features=sqrt(n_features)`.\n    - If \"log2\", then `max_features=log2(n_features)`.\n    - If None, then `max_features=n_features`.\n\n    .. versionchanged:: 1.1\n        The default of `max_features` changed from `\"auto\"` to `\"sqrt\"`.\n\n    .. deprecated:: 1.1\n        The `\"auto\"` option was deprecated in 1.1 and will be removed\n        in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -237762,7 +237762,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -237853,7 +237853,7 @@
                     "docstring": {
                         "type": "{\"squared_error\", \"friedman_mse\"}",
                         "default_value": "\"squared_error\"",
-                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and \"mae\" for the\nmean absolute error.\n\n.. versionadded:: 0.18\nMean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\nPoisson deviance criterion.\n\n.. deprecated:: 1.0\nCriterion \"mse\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\nCriterion \"mae\" was deprecated in v1.0 and will be removed in\nversion 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
+                        "description": "The function to measure the quality of a split. Supported criteria\nare \"squared_error\" for the mean squared error, which is equal to\nvariance reduction as feature selection criterion and \"mae\" for the\nmean absolute error.\n\n.. versionadded:: 0.18\n   Mean Absolute Error (MAE) criterion.\n\n.. versionadded:: 0.24\n    Poisson deviance criterion.\n\n.. deprecated:: 1.0\n    Criterion \"mse\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"squared_error\"` which is equivalent.\n\n.. deprecated:: 1.0\n    Criterion \"mae\" was deprecated in v1.0 and will be removed in\n    version 1.2. Use `criterion=\"absolute_error\"` which is equivalent."
                     },
                     "type": {
                         "kind": "EnumType",
@@ -237874,7 +237874,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["random", "best"]
+                        "values": ["best", "random"]
                     }
                 },
                 {
@@ -237904,7 +237904,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "2",
-                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n  `ceil(min_samples_split * n_samples)` are the minimum\n  number of samples for each split.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237930,7 +237930,7 @@
                     "docstring": {
                         "type": "int or float",
                         "default_value": "1",
-                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions."
+                        "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches.  This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n  `ceil(min_samples_leaf * n_samples)` are the minimum\n  number of samples for each node.\n\n.. versionchanged:: 0.18\n   Added float values for fractions."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -237973,14 +237973,14 @@
                     "docstring": {
                         "type": "int, float, {\"auto\", \"sqrt\", \"log2\"} or None",
                         "default_value": "1.0",
-                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\nThe default of `max_features` changed from `\"auto\"` to `1.0`.\n\n.. deprecated:: 1.1\nThe `\"auto\"` option was deprecated in 1.1 and will be removed\nin 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
+                        "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n  `int(max_features * n_features)` features are considered at each\n  split.\n- If \"auto\", then `max_features=n_features`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\n.. versionchanged:: 1.1\n    The default of `max_features` changed from `\"auto\"` to `1.0`.\n\n.. deprecated:: 1.1\n    The `\"auto\"` option was deprecated in 1.1 and will be removed\n    in 1.3.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features."
                     },
                     "type": {
                         "kind": "UnionType",
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["auto", "sqrt", "log2"]
+                                "values": ["log2", "sqrt", "auto"]
                             },
                             {
                                 "kind": "NamedType",
@@ -238037,7 +238037,7 @@
                     "docstring": {
                         "type": "float",
                         "default_value": "0.0",
-                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
+                        "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\n    N_t / N * (impurity - N_t_R / N_t * right_impurity\n                        - N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -239421,7 +239421,7 @@
                     "docstring": {
                         "type": "object or str",
                         "default_value": "None",
-                        "description": "Handle or name of the output file. If ``None``, the result is\nreturned as a string.\n\n.. versionchanged:: 0.20\nDefault of out_file changed from \"tree.dot\" to None."
+                        "description": "Handle or name of the output file. If ``None``, the result is\nreturned as a string.\n\n.. versionchanged:: 0.20\n    Default of out_file changed from \"tree.dot\" to None."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -239511,7 +239511,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["root", "none", "all"]
+                        "values": ["all", "root", "none"]
                     }
                 },
                 {
@@ -239688,7 +239688,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": ["sklearn/sklearn.tree"],
-            "description": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n$ dot -Tps tree.dot -o tree.ps      (PostScript format)\n$ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.",
+            "description": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n    $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n    $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.",
             "docstring": "Export a decision tree in DOT format.\n\nThis function generates a GraphViz representation of the decision tree,\nwhich is then written into `out_file`. Once exported, graphical renderings\ncan be generated using, for example::\n\n    $ dot -Tps tree.dot -o tree.ps      (PostScript format)\n    $ dot -Tpng tree.dot -o tree.png    (PNG format)\n\nThe sample counts that are shown are weighted with any sample_weights that\nmight be present.\n\nRead more in the :ref:`User Guide <tree>`.\n\nParameters\n----------\ndecision_tree : decision tree classifier\n    The decision tree to be exported to GraphViz.\n\nout_file : object or str, default=None\n    Handle or name of the output file. If ``None``, the result is\n    returned as a string.\n\n    .. versionchanged:: 0.20\n        Default of out_file changed from \"tree.dot\" to None.\n\nmax_depth : int, default=None\n    The maximum depth of the representation. If None, the tree is fully\n    generated.\n\nfeature_names : list of str, default=None\n    Names of each of the features.\n    If None generic names will be used (\"feature_0\", \"feature_1\", ...).\n\nclass_names : list of str or bool, default=None\n    Names of each of the target classes in ascending numerical order.\n    Only relevant for classification and not supported for multi-output.\n    If ``True``, shows a symbolic representation of the class name.\n\nlabel : {'all', 'root', 'none'}, default='all'\n    Whether to show informative labels for impurity, etc.\n    Options include 'all' to show at every node, 'root' to show only at\n    the top root node, or 'none' to not show at any node.\n\nfilled : bool, default=False\n    When set to ``True``, paint nodes to indicate majority class for\n    classification, extremity of values for regression, or purity of node\n    for multi-output.\n\nleaves_parallel : bool, default=False\n    When set to ``True``, draw all leaf nodes at the bottom of the tree.\n\nimpurity : bool, default=True\n    When set to ``True``, show the impurity at each node.\n\nnode_ids : bool, default=False\n    When set to ``True``, show the ID number on each node.\n\nproportion : bool, default=False\n    When set to ``True``, change the display of 'values' and/or 'samples'\n    to be proportions and percentages respectively.\n\nrotate : bool, default=False\n    When set to ``True``, orient tree left to right rather than top-down.\n\nrounded : bool, default=False\n    When set to ``True``, draw node boxes with rounded corners.\n\nspecial_characters : bool, default=False\n    When set to ``False``, ignore special characters for PostScript\n    compatibility.\n\nprecision : int, default=3\n    Number of digits of precision for floating point in the values of\n    impurity, threshold and value attributes of each node.\n\nfontname : str, default='helvetica'\n    Name of font used to render text.\n\nReturns\n-------\ndot_data : str\n    String representation of the input tree in GraphViz dot format.\n    Only returned if ``out_file`` is None.\n\n    .. versionadded:: 0.18\n\nExamples\n--------\n>>> from sklearn.datasets import load_iris\n>>> from sklearn import tree\n\n>>> clf = tree.DecisionTreeClassifier()\n>>> iris = load_iris()\n\n>>> clf = clf.fit(iris.data, iris.target)\n>>> tree.export_graphviz(clf)\n'digraph Tree {..."
         },
         {
@@ -239912,7 +239912,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["root", "none", "all"]
+                        "values": ["all", "root", "none"]
                     }
                 },
                 {
@@ -243535,7 +243535,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\nEstimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\nmay lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\nhere)\n- format() directly calls _safe_repr() for a first try at rendering the\nobject\n- _safe_repr formats the whole object recursively, only calling itself,\nnot caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\nthe appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\nthe type of the object. This where the line length and the compact\nparameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\nrendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\nsingle line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.",
+            "description": "Pretty Printer class for estimator objects.\n\nThis extends the pprint.PrettyPrinter class, because:\n- we need estimators to be printed with their parameters, e.g.\n  Estimator(param1=value1, ...) which is not supported by default.\n- the 'compact' parameter of PrettyPrinter is ignored for dicts, which\n  may lead to very long representations that we want to avoid.\n\nQuick overview of pprint.PrettyPrinter (see also\nhttps://stackoverflow.com/questions/49565047/pprint-with-hex-numbers):\n\n- the entry point is the _format() method which calls format() (overridden\n  here)\n- format() directly calls _safe_repr() for a first try at rendering the\n  object\n- _safe_repr formats the whole object recursively, only calling itself,\n  not caring about line length or anything\n- back to _format(), if the output string is too long, _format() then calls\n  the appropriate _pprint_TYPE() method (e.g. _pprint_list()) depending on\n  the type of the object. This where the line length and the compact\n  parameters are taken into account.\n- those _pprint_TYPE() methods will internally use the format() method for\n  rendering the nested objects of an object (e.g. the elements of a list)\n\nIn the end, everything has to be implemented twice: in _safe_repr and in\nthe custom _pprint_TYPE methods. Unfortunately PrettyPrinter is really not\nstraightforward to extend (especially when we want a compact output), so\nthe code is a bit convoluted.\n\nThis class overrides:\n- format() to support the changed_only parameter\n- _safe_repr to support printing of estimators (for when they fit on a\n  single line)\n- _format_dict_items so that dict are correctly 'compacted'\n- _format_items so that ellipsis is used on long lists and tuples\n\nWhen estimators cannot be printed on a single line, the builtin _format()\nwill call _pprint_estimator() because it was registered to do so (see\n_dispatch[BaseEstimator.__repr__] = _pprint_estimator).\n\nboth _format_dict_items() and _pprint_estimator() use the\n_format_params_or_dict_items() method that will format parameters and\nkey-value pairs respecting the compact parameter. This method needs another\nsubroutine _pprint_key_val_tuple() used when a parameter or a key-value\npair is too long to fit on a single line. This subroutine is called in\n_format() and is registered as well in the _dispatch dict (just like\n_pprint_estimator). We had to create the two classes KeyValTuple and\nKeyValTupleParam for this.",
             "docstring": ""
         },
         {
@@ -245895,16 +245895,16 @@
                     "type": {
                         "kind": "EnumType",
                         "values": [
-                            "array",
+                            "sparse",
                             "series",
+                            "slice",
                             "sparse_csc",
                             "dataframe",
                             "list",
                             "index",
                             "tuple",
-                            "sparse_csr",
-                            "slice",
-                            "sparse"
+                            "array",
+                            "sparse_csr"
                         ]
                     }
                 },
@@ -246634,7 +246634,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Test that a certain warning occurs.\n\n.. deprecated:: 1.0\n`assert_warns` is deprecated in 1.0 and will be removed in 1.2.\nUse `pytest.warns` instead.",
+            "description": "Test that a certain warning occurs.\n\n.. deprecated:: 1.0\n    `assert_warns` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.",
             "docstring": "Test that a certain warning occurs.\n\n.. deprecated:: 1.0\n    `assert_warns` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.\n\nParameters\n----------\nwarning_class : the warning class\n    The class to test for, e.g. UserWarning.\n\nfunc : callable\n    Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`\n\nReturns\n-------\nresult : the return value of `func`"
         },
         {
@@ -246743,7 +246743,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Test that a certain warning occurs and with a certain message.\n\n.. deprecated:: 1.0\n`assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\nUse `pytest.warns` instead.",
+            "description": "Test that a certain warning occurs and with a certain message.\n\n.. deprecated:: 1.0\n    `assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.",
             "docstring": "Test that a certain warning occurs and with a certain message.\n\n.. deprecated:: 1.0\n    `assert_warns_message` is deprecated in 1.0 and will be removed in 1.2.\n    Use `pytest.warns` instead.\n\nParameters\n----------\nwarning_class : the warning class\n    The class to test for, e.g. UserWarning.\n\nmessage : str or callable\n    The message or a substring of the message to test for. If callable,\n    it takes a string as the argument and will trigger an AssertionError\n    if the callable returns `False`.\n\nfunc : callable\n    Callable object to trigger warnings.\n\n*args : the positional arguments to `func`.\n\n**kw : the keyword arguments to `func`.\n\nReturns\n-------\nresult : the return value of `func`"
         },
         {
@@ -249610,7 +249610,7 @@
                     "docstring": {
                         "type": "estimator object",
                         "default_value": "",
-                        "description": "Estimator instance to check.\n\n.. versionadded:: 1.1\nPassing a class was deprecated in version 0.23, and support for\nclasses was removed in 0.24."
+                        "description": "Estimator instance to check.\n\n.. versionadded:: 1.1\n   Passing a class was deprecated in version 0.23, and support for\n   classes was removed in 0.24."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -249644,7 +249644,7 @@
                     "docstring": {
                         "type": "estimator object",
                         "default_value": "",
-                        "description": "Estimator instance to check.\n\n.. deprecated:: 1.1\n``Estimator`` was deprecated in favor of ``estimator`` in version 1.1\nand will be removed in version 1.3."
+                        "description": "Estimator instance to check.\n\n.. deprecated:: 1.1\n    ``Estimator`` was deprecated in favor of ``estimator`` in version 1.1\n    and will be removed in version 1.3."
                     },
                     "type": {
                         "kind": "NamedType",
@@ -252069,7 +252069,7 @@
                     "docstring": {
                         "type": "list of estimators instances",
                         "default_value": "",
-                        "description": "Estimators to generated checks for.\n\n.. versionchanged:: 0.24\nPassing a class was deprecated in version 0.23, and support for\nclasses was removed in 0.24. Pass an instance instead.\n\n.. versionadded:: 0.24"
+                        "description": "Estimators to generated checks for.\n\n.. versionchanged:: 0.24\n   Passing a class was deprecated in version 0.23, and support for\n   classes was removed in 0.24. Pass an instance instead.\n\n.. versionadded:: 0.24"
                     },
                     "type": {
                         "kind": "NamedType",
@@ -252080,7 +252080,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\npytest test_check_estimators.py -k check_estimators_fit_returns_self",
+            "description": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n    pytest test_check_estimators.py -k check_estimators_fit_returns_self",
             "docstring": "Pytest specific decorator for parametrizing estimator checks.\n\nThe `id` of each check is set to be a pprint version of the estimator\nand the name of the check with its keyword arguments.\nThis allows to use `pytest -k` to specify which tests to run::\n\n    pytest test_check_estimators.py -k check_estimators_fit_returns_self\n\nParameters\n----------\nestimators : list of estimators instances\n    Estimators to generated checks for.\n\n    .. versionchanged:: 0.24\n       Passing a class was deprecated in version 0.23, and support for\n       classes was removed in 0.24. Pass an instance instead.\n\n    .. versionadded:: 0.24\n\nReturns\n-------\ndecorator : `pytest.mark.parametrize`\n\nSee Also\n--------\ncheck_estimator : Check if estimator adheres to scikit-learn conventions.\n\nExamples\n--------\n>>> from sklearn.utils.estimator_checks import parametrize_with_checks\n>>> from sklearn.linear_model import LogisticRegression\n>>> from sklearn.tree import DecisionTreeRegressor\n\n>>> @parametrize_with_checks([LogisticRegression(),\n...                           DecisionTreeRegressor()])\n... def test_sklearn_compatible_estimator(estimator, check):\n...     check(estimator)"
         },
         {
@@ -252326,7 +252326,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["LU", "none", "auto", "QR"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -252343,7 +252343,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["module", "value"]
+                        "values": ["value", "module"]
                     }
                 },
                 {
@@ -252625,7 +252625,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n-log(1 + exp(-x_i))     if x_i > 0\nx_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.",
+            "description": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n    -log(1 + exp(-x_i))     if x_i > 0\n    x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.",
             "docstring": "Compute the log of the logistic function, ``log(1 / (1 + e ** -x))``.\n\nThis implementation is numerically stable because it splits positive and\nnegative values::\n\n    -log(1 + exp(-x_i))     if x_i > 0\n    x_i - log(1 + exp(x_i)) if x_i <= 0\n\nFor the ordinary logistic function, use ``scipy.special.expit``.\n\nParameters\n----------\nX : array-like of shape (M, N) or (M,)\n    Argument to the logistic function.\n\nout : array-like of shape (M, N) or (M,), default=None\n    Preallocated output array.\n\nReturns\n-------\nout : ndarray of shape (M, N) or (M,)\n    Log of the logistic function evaluated at every point in x.\n\nNotes\n-----\nSee the blog post describing this implementation:\nhttp://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/"
         },
         {
@@ -252746,7 +252746,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["LU", "none", "auto", "QR"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -252883,7 +252883,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["LU", "none", "auto", "QR"]
+                        "values": ["LU", "none", "QR", "auto"]
                     }
                 },
                 {
@@ -252939,7 +252939,7 @@
                     "docstring": {
                         "type": "int, RandomState instance or None",
                         "default_value": "'warn'",
-                        "description": "The seed of the pseudo random number generator to use when\nshuffling the data, i.e. getting the random vectors to initialize\nthe algorithm. Pass an int for reproducible results across multiple\nfunction calls. See :term:`Glossary <random_state>`.\n\n.. versionchanged:: 1.2\nThe previous behavior (`random_state=0`) is deprecated, and\nfrom v1.2 the default value will be `random_state=None`. Set\nthe value of `random_state` explicitly to suppress the deprecation\nwarning."
+                        "description": "The seed of the pseudo random number generator to use when\nshuffling the data, i.e. getting the random vectors to initialize\nthe algorithm. Pass an int for reproducible results across multiple\nfunction calls. See :term:`Glossary <random_state>`.\n\n.. versionchanged:: 1.2\n    The previous behavior (`random_state=0`) is deprecated, and\n    from v1.2 the default value will be `random_state=None`. Set\n    the value of `random_state` explicitly to suppress the deprecation\n    warning."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -253735,7 +253735,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["distance", "connectivity"]
+                        "values": ["connectivity", "distance"]
                     }
                 },
                 {
@@ -253845,7 +253845,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["D", "auto", "FW"]
+                        "values": ["FW", "D", "auto"]
                     }
                 }
             ],
@@ -254612,7 +254612,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\n.. deprecated:: 1.3\n`if_delegate_has_method` is deprecated in version 1.1 and will be removed in\nversion 1.3. Use `available_if` instead.",
+            "description": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.",
             "docstring": "Create a decorator for methods that are delegated to a sub-estimator\n\nThis enables ducktyping by hasattr returning True according to the\nsub-estimator.\n\n.. deprecated:: 1.3\n    `if_delegate_has_method` is deprecated in version 1.1 and will be removed in\n    version 1.3. Use `available_if` instead.\n\nParameters\n----------\ndelegate : str, list of str or tuple of str\n    Name of the sub-estimator that can be accessed as an attribute of the\n    base object. If a list or a tuple of names are provided, the first\n    sub-estimator that is an attribute of the base object will be used."
         },
         {
@@ -254828,7 +254828,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n'binary', 'multiclass', 'multiclass-multioutput',\n'multilabel-indicator', 'multilabel-sequences'",
+            "description": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n    'binary', 'multiclass', 'multiclass-multioutput',\n    'multilabel-indicator', 'multilabel-sequences'",
             "docstring": "Ensure that target y is of a non-regression type.\n\nOnly the following target types (as defined in type_of_target) are allowed:\n    'binary', 'multiclass', 'multiclass-multioutput',\n    'multilabel-indicator', 'multilabel-sequences'\n\nParameters\n----------\ny : array-like\n    Target values."
         },
         {
@@ -254961,7 +254961,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n* ``binary`` is more specific but compatible with ``multiclass``.\n* ``multiclass`` of integers is more specific but compatible with\n``continuous``.\n* ``multilabel-indicator`` is more specific but compatible with\n``multiclass-multioutput``.",
+            "description": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n    * ``binary`` is more specific but compatible with ``multiclass``.\n    * ``multiclass`` of integers is more specific but compatible with\n      ``continuous``.\n    * ``multilabel-indicator`` is more specific but compatible with\n      ``multiclass-multioutput``.",
             "docstring": "Determine the type of data indicated by the target.\n\nNote that this type is the most specific type that can be inferred.\nFor example:\n\n    * ``binary`` is more specific but compatible with ``multiclass``.\n    * ``multiclass`` of integers is more specific but compatible with\n      ``continuous``.\n    * ``multilabel-indicator`` is more specific but compatible with\n      ``multiclass-multioutput``.\n\nParameters\n----------\ny : array-like\n\ninput_name : str, default=\"\"\n    The data name used to construct the error message.\n\n    .. versionadded:: 1.1.0\n\nReturns\n-------\ntarget_type : str\n    One of:\n\n    * 'continuous': `y` is an array-like of floats that are not all\n      integers, and is 1d or a column vector.\n    * 'continuous-multioutput': `y` is a 2d array of floats that are\n      not all integers, and both dimensions are of size > 1.\n    * 'binary': `y` contains <= 2 discrete values and is 1d or a column\n      vector.\n    * 'multiclass': `y` contains more than two discrete values, is not a\n      sequence of sequences, and is 1d or a column vector.\n    * 'multiclass-multioutput': `y` is a 2d array that contains more\n      than two discrete values, is not a sequence of sequences, and both\n      dimensions are of size > 1.\n    * 'multilabel-indicator': `y` is a label indicator matrix, an array\n      of two dimensions with at least two columns, and at most 2 unique\n      values.\n    * 'unknown': `y` is array-like but none of the above, such as a 3d\n      array, sequence of sequences, or an array of non-sequence objects.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import type_of_target\n>>> import numpy as np\n>>> type_of_target([0.1, 0.6])\n'continuous'\n>>> type_of_target([1, -1, -1, 1])\n'binary'\n>>> type_of_target(['a', 'b', 'a'])\n'binary'\n>>> type_of_target([1.0, 2.0])\n'binary'\n>>> type_of_target([1, 0, 2])\n'multiclass'\n>>> type_of_target([1.0, 0.0, 3.0])\n'multiclass'\n>>> type_of_target(['a', 'b', 'c'])\n'multiclass'\n>>> type_of_target(np.array([[1, 2], [3, 1]]))\n'multiclass-multioutput'\n>>> type_of_target([[1, 2]])\n'multilabel-indicator'\n>>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]]))\n'continuous-multioutput'\n>>> type_of_target(np.array([[0, 1], [1, 1]]))\n'multilabel-indicator'"
         },
         {
@@ -254991,7 +254991,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Extract an ordered array of unique labels.\n\nWe don't allow:\n- mix of multilabel and multiclass (single label) targets\n- mix of label indicator matrix and anything else,\nbecause there are no explicit labels)\n- mix of label indicator matrices of different sizes\n- mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.",
+            "description": "Extract an ordered array of unique labels.\n\nWe don't allow:\n    - mix of multilabel and multiclass (single label) targets\n    - mix of label indicator matrix and anything else,\n      because there are no explicit labels)\n    - mix of label indicator matrices of different sizes\n    - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.",
             "docstring": "Extract an ordered array of unique labels.\n\nWe don't allow:\n    - mix of multilabel and multiclass (single label) targets\n    - mix of label indicator matrix and anything else,\n      because there are no explicit labels)\n    - mix of label indicator matrices of different sizes\n    - mix of string and integer labels\n\nAt the moment, we also don't allow \"multiclass-multioutput\" input type.\n\nParameters\n----------\n*ys : array-likes\n\nReturns\n-------\nout : ndarray of shape (n_unique_labels,)\n    An ordered array of unique labels.\n\nExamples\n--------\n>>> from sklearn.utils.multiclass import unique_labels\n>>> unique_labels([3, 5, 5, 5, 7, 7])\narray([3, 5, 7])\n>>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4])\narray([1, 2, 3, 4])\n>>> unique_labels([1, 2, 10], [5, 11])\narray([ 1,  2,  5, 10, 11])"
         },
         {
@@ -256870,7 +256870,7 @@
             "results": [],
             "is_public": true,
             "reexported_by": [],
-            "description": "Compute minimium and maximum along an axis on a CSR or CSC matrix.\n\nOptionally ignore NaN values.",
+            "description": "Compute minimium and maximum along an axis on a CSR or CSC matrix.\n\n Optionally ignore NaN values.",
             "docstring": "Compute minimium and maximum along an axis on a CSR or CSC matrix.\n\n Optionally ignore NaN values.\n\nParameters\n----------\nX : sparse matrix of shape (n_samples, n_features)\n    Input data. It should be of CSR or CSC format.\n\naxis : {0, 1}\n    Axis along which the axis should be computed.\n\nignore_nan : bool, default=False\n    Ignore or passing through NaN values.\n\n    .. versionadded:: 0.20\n\nReturns\n-------\n\nmins : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n    Feature-wise minima.\n\nmaxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}\n    Feature-wise maxima."
         },
         {
@@ -256937,7 +256937,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n.. versionchanged:: 0.24\nAccepts 2D `array`.",
+            "description": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n    .. versionchanged:: 0.24\n        Accepts 2D `array`.",
             "docstring": "Compute weighted percentile\n\nComputes lower weighted percentile. If `array` is a 2D array, the\n`percentile` is computed along the axis 0.\n\n    .. versionchanged:: 0.24\n        Accepts 2D `array`.\n\nParameters\n----------\narray : 1D or 2D array\n    Values to take the weighted percentile of.\n\nsample_weight: 1D or 2D array\n    Weights for each value in `array`. Must be same shape as `array` or\n    of shape `(array.shape[0],)`.\n\npercentile: int or float, default=50\n    Percentile to compute. Must be value between 0 and 100.\n\nReturns\n-------\npercentile : int if `array` 1D, ndarray if `array` 2D\n    Weighted percentile."
         },
         {
@@ -257161,7 +257161,7 @@
                     "docstring": {
                         "type": "array-like of str or None",
                         "default_value": "None",
-                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\nused as feature names in. If `feature_names_in_` is not defined,\nthen the following input feature names are generated:\n`[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\nmatch `feature_names_in_` if `feature_names_in_` is defined."
+                        "description": "Input features.\n\n- If `input_features` is `None`, then `feature_names_in_` is\n  used as feature names in. If `feature_names_in_` is not defined,\n  then the following input feature names are generated:\n  `[\"x0\", \"x1\", ..., \"x(n_features_in_ - 1)\"]`.\n- If `input_features` is an array-like, then `input_features` must\n  match `feature_names_in_` if `feature_names_in_` is defined."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -257350,7 +257350,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n1e-5 times the maximum real part). If this check fails, it raises a\n``ValueError``. Otherwise all non-significant imaginary parts that may\nremain are set to zero. This operation is traced with a\n``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\nmore than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\npositive eigenvalue in double (simple) precision. If this check fails,\nit raises a ``ValueError``. Otherwise all negative eigenvalues that may\nremain are set to zero. This operation is traced with a\n``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.",
+            "description": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n  1e-5 times the maximum real part). If this check fails, it raises a\n  ``ValueError``. Otherwise all non-significant imaginary parts that may\n  remain are set to zero. This operation is traced with a\n  ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n  ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n  more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n  positive eigenvalue in double (simple) precision. If this check fails,\n  it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n  remain are set to zero. This operation is traced with a\n  ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.",
             "docstring": "Check the eigenvalues of a positive semidefinite (PSD) matrix.\n\nChecks the provided array of PSD matrix eigenvalues for numerical or\nconditioning issues and returns a fixed validated version. This method\nshould typically be used if the PSD matrix is user-provided (e.g. a\nGram matrix) or computed using a user-provided dissimilarity metric\n(e.g. kernel function), or if the decomposition process uses approximation\nmethods (randomized SVD, etc.).\n\nIt checks for three things:\n\n- that there are no significant imaginary parts in eigenvalues (more than\n  1e-5 times the maximum real part). If this check fails, it raises a\n  ``ValueError``. Otherwise all non-significant imaginary parts that may\n  remain are set to zero. This operation is traced with a\n  ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\n- that eigenvalues are not all negative. If this check fails, it raises a\n  ``ValueError``\n\n- that there are no significant negative eigenvalues with absolute value\n  more than 1e-10 (1e-6) and more than 1e-5 (5e-3) times the largest\n  positive eigenvalue in double (simple) precision. If this check fails,\n  it raises a ``ValueError``. Otherwise all negative eigenvalues that may\n  remain are set to zero. This operation is traced with a\n  ``PositiveSpectrumWarning`` when ``enable_warnings=True``.\n\nFinally, all the positive eigenvalues that are too small (with a value\nsmaller than the maximum eigenvalue multiplied by 1e-12 (2e-7)) are set to\nzero. This operation is traced with a ``PositiveSpectrumWarning`` when\n``enable_warnings=True``.\n\nParameters\n----------\nlambdas : array-like of shape (n_eigenvalues,)\n    Array of eigenvalues to check / fix.\n\nenable_warnings : bool, default=False\n    When this is set to ``True``, a ``PositiveSpectrumWarning`` will be\n    raised when there are imaginary parts, negative eigenvalues, or\n    extremely small non-zero eigenvalues. Otherwise no warning will be\n    raised. In both cases, imaginary parts, negative eigenvalues, and\n    extremely small non-zero eigenvalues will be set to zero.\n\nReturns\n-------\nlambdas_fixed : ndarray of shape (n_eigenvalues,)\n    A fixed validated copy of the array of eigenvalues.\n\nExamples\n--------\n>>> from sklearn.utils.validation import _check_psd_eigenvalues\n>>> _check_psd_eigenvalues([1, 2])      # nominal case\narray([1, 2])\n>>> _check_psd_eigenvalues([5, 5j])     # significant imag part\nTraceback (most recent call last):\n    ...\nValueError: There are significant imaginary parts in eigenvalues (1\n    of the maximum real part). Either the matrix is not PSD, or there was\n    an issue while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, 5e-5j])  # insignificant imag part\narray([5., 0.])\n>>> _check_psd_eigenvalues([-5, -1])    # all negative\nTraceback (most recent call last):\n    ...\nValueError: All eigenvalues are negative (maximum is -1). Either the\n    matrix is not PSD, or there was an issue while computing the\n    eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -1])     # significant negative\nTraceback (most recent call last):\n    ...\nValueError: There are significant negative eigenvalues (0.2 of the\n    maximum positive). Either the matrix is not PSD, or there was an issue\n    while computing the eigendecomposition of the matrix.\n>>> _check_psd_eigenvalues([5, -5e-5])  # insignificant negative\narray([5., 0.])\n>>> _check_psd_eigenvalues([5, 4e-12])  # bad conditioning (too small)\narray([5., 0.])"
         },
         {
@@ -257457,7 +257457,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\nsample_weight = _check_sample_weight(...)",
+            "description": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n    sample_weight = _check_sample_weight(...)",
             "docstring": "Validate sample weights.\n\nNote that passing sample_weight=None will output an array of ones.\nTherefore, in some cases, you may want to protect the call with:\nif sample_weight is not None:\n    sample_weight = _check_sample_weight(...)\n\nParameters\n----------\nsample_weight : {ndarray, Number or None}, shape (n_samples,)\n    Input sample weights.\n\nX : {ndarray, list, sparse matrix}\n    Input data.\n\nonly_non_negative : bool, default=False,\n    Whether or not the weights are expected to be non-negative.\n\n    .. versionadded:: 1.0\n\ndtype : dtype, default=None\n    dtype of the validated `sample_weight`.\n    If None, and the input `sample_weight` is an array, the dtype of the\n    input is preserved; otherwise an array with the default numpy dtype\n    is be allocated.  If `dtype` is not one of `float32`, `float64`,\n    `None`, the output will be of dtype `float64`.\n\ncopy : bool, default=False\n    If True, a copy of sample_weight will be created.\n\nReturns\n-------\nsample_weight : ndarray of shape (n_samples,)\n    Validated sample weight. It is guaranteed to be \"C\" contiguous."
         },
         {
@@ -257713,7 +257713,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\nbe infinite.\n\n.. versionadded:: 0.20\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`"
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n  be infinite.\n\n.. versionadded:: 0.20\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -257870,7 +257870,7 @@
                     "docstring": {
                         "type": "{ndarray, dataframe} of shape (n_samples, n_features)",
                         "default_value": "",
-                        "description": "Array container to extract feature names.\n\n- pandas dataframe : The columns will be considered to be feature\nnames. If the dataframe contains non-string feature names, `None` is\nreturned.\n- All other array containers will return `None`."
+                        "description": "Array container to extract feature names.\n\n- pandas dataframe : The columns will be considered to be feature\n  names. If the dataframe contains non-string feature names, `None` is\n  returned.\n- All other array containers will return `None`."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258010,7 +258010,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Return the number of features in an array-like X.\n\nThis helper function tries hard to avoid to materialize an array version\nof X unless necessary. For instance, if X is a list of lists,\nthis function will return the length of the first element, assuming\nthat subsequent elements are all lists of the same length without\nchecking.\nParameters\n----------\nX : array-like\narray-like to get the number of features.",
+            "description": "Return the number of features in an array-like X.\n\nThis helper function tries hard to avoid to materialize an array version\nof X unless necessary. For instance, if X is a list of lists,\nthis function will return the length of the first element, assuming\nthat subsequent elements are all lists of the same length without\nchecking.\nParameters\n----------\nX : array-like\n    array-like to get the number of features.",
             "docstring": "Return the number of features in an array-like X.\n\nThis helper function tries hard to avoid to materialize an array version\nof X unless necessary. For instance, if X is a list of lists,\nthis function will return the length of the first element, assuming\nthat subsequent elements are all lists of the same length without\nchecking.\nParameters\n----------\nX : array-like\n    array-like to get the number of features.\n\nReturns\n-------\nfeatures : int\n    Number of features"
         },
         {
@@ -258117,7 +258117,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "True",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\nbe infinite.\n\n.. versionadded:: 0.20\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`"
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. The\npossibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan and pd.NA values in X. Values cannot\n  be infinite.\n\n.. versionadded:: 0.20\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258356,7 +258356,7 @@
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["C", "F"]
+                        "values": ["F", "C"]
                     }
                 },
                 {
@@ -258386,7 +258386,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "True",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\ndoes not influence whether y can have np.inf, np.nan, pd.NA values.\nThe possibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\nbe infinite.\n\n.. versionadded:: 0.20\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`"
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in X. This parameter\ndoes not influence whether y can have np.inf, np.nan, pd.NA values.\nThe possibilities are:\n\n- True: Force all values of X to be finite.\n- False: accepts np.inf, np.nan, pd.NA in X.\n- 'allow-nan': accepts only np.nan or pd.NA values in X. Values cannot\n  be infinite.\n\n.. versionadded:: 0.20\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -258658,7 +258658,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["C", "F"]
+                                "values": ["F", "C"]
                             },
                             {
                                 "kind": "NamedType",
@@ -258694,7 +258694,7 @@
                     "docstring": {
                         "type": "bool or 'allow-nan'",
                         "default_value": "True",
-                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\ncannot be infinite.\n\n.. versionadded:: 0.20\n``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\nAccepts `pd.NA` and converts it into `np.nan`"
+                        "description": "Whether to raise an error on np.inf, np.nan, pd.NA in array. The\npossibilities are:\n\n- True: Force all values of array to be finite.\n- False: accepts np.inf, np.nan, pd.NA in array.\n- 'allow-nan': accepts only np.nan and pd.NA values in array. Values\n  cannot be infinite.\n\n.. versionadded:: 0.20\n   ``force_all_finite`` accepts the string ``'allow-nan'``.\n\n.. versionchanged:: 0.23\n   Accepts `pd.NA` and converts it into `np.nan`"
                     },
                     "type": {
                         "kind": "UnionType",
@@ -259231,11 +259231,11 @@
                     "docstring": {
                         "type": "{\"left\", \"right\", \"both\", \"neither\"}",
                         "default_value": "\"both\"",
-                        "description": "Whether the interval defined by `min_val` and `max_val` should include\nthe boundaries. Possible choices are:\n\n- `\"left\"`: only `min_val` is included in the valid interval.\nIt is equivalent to the interval `[ min_val, max_val )`.\n- `\"right\"`: only `max_val` is included in the valid interval.\nIt is equivalent to the interval `( min_val, max_val ]`.\n- `\"both\"`: `min_val` and `max_val` are included in the valid interval.\nIt is equivalent to the interval `[ min_val, max_val ]`.\n- `\"neither\"`: neither `min_val` nor `max_val` are included in the\nvalid interval. It is equivalent to the interval `( min_val, max_val )`."
+                        "description": "Whether the interval defined by `min_val` and `max_val` should include\nthe boundaries. Possible choices are:\n\n- `\"left\"`: only `min_val` is included in the valid interval.\n  It is equivalent to the interval `[ min_val, max_val )`.\n- `\"right\"`: only `max_val` is included in the valid interval.\n  It is equivalent to the interval `( min_val, max_val ]`.\n- `\"both\"`: `min_val` and `max_val` are included in the valid interval.\n  It is equivalent to the interval `[ min_val, max_val ]`.\n- `\"neither\"`: neither `min_val` nor `max_val` are included in the\n  valid interval. It is equivalent to the interval `( min_val, max_val )`."
                     },
                     "type": {
                         "kind": "EnumType",
-                        "values": ["neither", "right", "both", "left"]
+                        "values": ["both", "left", "neither", "right"]
                     }
                 }
             ],
@@ -260056,7 +260056,7 @@
                     "docstring": {
                         "type": "bool, int, str, slice, array-like",
                         "default_value": "",
-                        "description": "- If `axis=0`, boolean and integer array-like, integer slice,\nand scalar integer are supported.\n- If `axis=1`:\n- to select a single column, `indices` can be of `int` type for\nall `X` types and `str` only for dataframe. The selected subset\nwill be 1D, unless `X` is a sparse matrix in which case it will\nbe 2D.\n- to select multiples columns, `indices` can be one of the\nfollowing: `list`, `array`, `slice`. The type used in\nthese containers can be one of the following: `int`, 'bool' and\n`str`. However, `str` is only supported when `X` is a dataframe.\nThe selected subset will be 2D."
+                        "description": "- If `axis=0`, boolean and integer array-like, integer slice,\n  and scalar integer are supported.\n- If `axis=1`:\n    - to select a single column, `indices` can be of `int` type for\n      all `X` types and `str` only for dataframe. The selected subset\n      will be 1D, unless `X` is a sparse matrix in which case it will\n      be 2D.\n    - to select multiples columns, `indices` can be one of the\n      following: `list`, `array`, `slice`. The type used in\n      these containers can be one of the following: `int`, 'bool' and\n      `str`. However, `str` is only supported when `X` is a dataframe.\n      The selected subset will be 2D."
                     },
                     "type": {
                         "kind": "UnionType",
@@ -260105,7 +260105,7 @@
             "results": [],
             "is_public": false,
             "reexported_by": [],
-            "description": "Return rows, items or columns of X using indices.\n\n.. warning::\n\nThis utility is documented, but **private**. This means that\nbackward compatibility might be broken without any deprecation\ncycle.",
+            "description": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n    This utility is documented, but **private**. This means that\n    backward compatibility might be broken without any deprecation\n    cycle.",
             "docstring": "Return rows, items or columns of X using indices.\n\n.. warning::\n\n    This utility is documented, but **private**. This means that\n    backward compatibility might be broken without any deprecation\n    cycle.\n\nParameters\n----------\nX : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series\n    Data from which to sample rows, items or columns. `list` are only\n    supported when `axis=0`.\nindices : bool, int, str, slice, array-like\n    - If `axis=0`, boolean and integer array-like, integer slice,\n      and scalar integer are supported.\n    - If `axis=1`:\n        - to select a single column, `indices` can be of `int` type for\n          all `X` types and `str` only for dataframe. The selected subset\n          will be 1D, unless `X` is a sparse matrix in which case it will\n          be 2D.\n        - to select multiples columns, `indices` can be one of the\n          following: `list`, `array`, `slice`. The type used in\n          these containers can be one of the following: `int`, 'bool' and\n          `str`. However, `str` is only supported when `X` is a dataframe.\n          The selected subset will be 2D.\naxis : int, default=0\n    The axis along which `X` will be subsampled. `axis=0` will select\n    rows while `axis=1` will select columns.\n\nReturns\n-------\nsubset\n    Subset of X on axis 0 or 1.\n\nNotes\n-----\nCSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are\nnot supported."
         },
         {
@@ -260161,7 +260161,7 @@
                         "types": [
                             {
                                 "kind": "EnumType",
-                                "values": ["transformer", "regressor", "cluster", "classifier"]
+                                "values": ["transformer", "classifier", "regressor", "cluster"]
                             },
                             {
                                 "kind": "NamedType",